diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h index cdbf02d9c1d4..95d5b0d625cd 100644 --- a/arch/arm/include/asm/paravirt.h +++ b/arch/arm/include/asm/paravirt.h @@ -3,23 +3,19 @@ #define _ASM_ARM_PARAVIRT_H #ifdef CONFIG_PARAVIRT +#include + struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; -struct pv_time_ops { - unsigned long long (*steal_clock)(int cpu); -}; +u64 dummy_steal_clock(int cpu); -struct paravirt_patch_template { - struct pv_time_ops time; -}; - -extern struct paravirt_patch_template pv_ops; +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); static inline u64 paravirt_steal_clock(int cpu) { - return pv_ops.time.steal_clock(cpu); + return static_call(pv_steal_clock)(cpu); } #endif diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c index 4cfed91fe256..7dd9806369fb 100644 --- a/arch/arm/kernel/paravirt.c +++ b/arch/arm/kernel/paravirt.c @@ -9,10 +9,15 @@ #include #include #include +#include #include struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; -struct paravirt_patch_template pv_ops; -EXPORT_SYMBOL_GPL(pv_ops); +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index cf3a0fd7c1a7..9aa193e0e8f2 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -3,23 +3,19 @@ #define _ASM_ARM64_PARAVIRT_H #ifdef CONFIG_PARAVIRT +#include + struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; -struct pv_time_ops { - unsigned long long (*steal_clock)(int cpu); -}; +u64 dummy_steal_clock(int cpu); -struct paravirt_patch_template { - struct pv_time_ops time; -}; - -extern struct paravirt_patch_template pv_ops; +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); static inline u64 paravirt_steal_clock(int cpu) { - return pv_ops.time.steal_clock(cpu); + return static_call(pv_steal_clock)(cpu); } int __init pv_time_init(void); diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index c07d7a034941..75fed4460407 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -26,8 +27,12 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; -struct paravirt_patch_template pv_ops; -EXPORT_SYMBOL_GPL(pv_ops); +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); struct pv_time_stolen_time_region { struct pvclock_vcpu_stolen_time *kaddr; @@ -45,7 +50,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); /* return stolen time in ns by asking the hypervisor */ -static u64 pv_steal_clock(int cpu) +static u64 para_steal_clock(int cpu) { struct pv_time_stolen_time_region *reg; @@ -150,7 +155,7 @@ int __init pv_time_init(void) if (ret) return ret; - pv_ops.time.steal_clock = pv_steal_clock; + static_call_update(pv_steal_clock, para_steal_clock); static_key_slow_inc(¶virt_steal_enabled); if (steal_acc) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d0db8cc4293e..513895af8ee7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -774,6 +774,7 @@ if HYPERVISOR_GUEST config PARAVIRT bool "Enable paravirtualization code" + depends on HAVE_STATIC_CALL help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bee9101e211e..8096b861e424 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include @@ -349,7 +349,7 @@ * will soon execute iret and the tracer was already set to * the irqstate after the IRET: */ - DISABLE_INTERRUPTS(CLBR_ANY) + cli lss (%esp), %esp /* switch to espfix segment */ .Lend_\@: #endif /* CONFIG_X86_ESPFIX32 */ @@ -994,7 +994,7 @@ restore_all_switch_stack: * when returning from IPI handler and when returning from * scheduler to user-space. */ - INTERRUPT_RETURN + iret .section .fixup, "ax" SYM_CODE_START(asm_iret_error) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 400908dff42e..12e2e3cd58be 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -305,7 +305,7 @@ SYM_CODE_END(ret_from_fork) .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY pushq %rax - SAVE_FLAGS(CLBR_RAX) + SAVE_FLAGS testl $X86_EFLAGS_IF, %eax jz .Lokay_\@ ud2 diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index de1fff7188aa..d6a6080bade0 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -6,7 +6,7 @@ #include #include #include -#include +#include .text .globl __kernel_vsyscall diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h deleted file mode 100644 index 464034db299f..000000000000 --- a/arch/x86/include/asm/alternative-asm.h +++ /dev/null @@ -1,114 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_ALTERNATIVE_ASM_H -#define _ASM_X86_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -#include - -#ifdef CONFIG_SMP - .macro LOCK_PREFIX -672: lock - .pushsection .smp_locks,"a" - .balign 4 - .long 672b - . - .popsection - .endm -#else - .macro LOCK_PREFIX - .endm -#endif - -/* - * objtool annotation to ignore the alternatives and only consider the original - * instruction(s). - */ -.macro ANNOTATE_IGNORE_ALTERNATIVE - .Lannotate_\@: - .pushsection .discard.ignore_alts - .long .Lannotate_\@ - . - .popsection -.endm - -/* - * Issue one struct alt_instr descriptor entry (need to put it into - * the section .altinstructions, see below). This entry contains - * enough information for the alternatives patching code to patch an - * instruction. See apply_alternatives(). - */ -.macro altinstruction_entry orig alt feature orig_len alt_len pad_len - .long \orig - . - .long \alt - . - .word \feature - .byte \orig_len - .byte \alt_len - .byte \pad_len -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. ".skip" directive takes care of proper instruction padding - * in case @newinstr is longer than @oldinstr. - */ -.macro ALTERNATIVE oldinstr, newinstr, feature -140: - \oldinstr -141: - .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr -144: - .popsection -.endm - -#define old_len 141b-140b -#define new_len1 144f-143f -#define new_len2 145f-144f - -/* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) - - -/* - * Same as ALTERNATIVE macro above but for two alternatives. If CPU - * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has - * @feature2, it replaces @oldinstr with @feature2. - */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 -140: - \oldinstr -141: - .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_short(new_len1, new_len2) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_ALTERNATIVE_ASM_H */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 13adca37c99a..17b36090d448 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -2,13 +2,17 @@ #ifndef _ASM_X86_ALTERNATIVE_H #define _ASM_X86_ALTERNATIVE_H -#ifndef __ASSEMBLY__ - #include -#include #include #include +#define ALTINSTR_FLAG_INV (1 << 15) +#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) + +#ifndef __ASSEMBLY__ + +#include + /* * Alternative inline assembly for SMP. * @@ -150,7 +154,7 @@ static inline int alternatives_text_reserved(void *start, void *end) " .byte " alt_rlen(num) "\n" /* replacement len */ \ " .byte " alt_pad_len "\n" /* pad len */ -#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ +#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \ "# ALT: replacement " #num "\n" \ b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n" @@ -161,7 +165,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ALTINSTR_ENTRY(feature, 1) \ ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ALTINSTR_REPLACEMENT(newinstr, 1) \ ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ @@ -171,10 +175,15 @@ static inline int alternatives_text_reserved(void *start, void *end) ALTINSTR_ENTRY(feature2, 2) \ ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ - ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ALTINSTR_REPLACEMENT(newinstr1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, 2) \ ".popsection\n" +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ + newinstr_yes, feature) + #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \ OLDINSTR_3(oldinsn, 1, 2, 3) \ ".pushsection .altinstructions,\"a\"\n" \ @@ -183,9 +192,9 @@ static inline int alternatives_text_reserved(void *start, void *end) ALTINSTR_ENTRY(feat3, 3) \ ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \ - ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \ - ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \ + ALTINSTR_REPLACEMENT(newinsn1, 1) \ + ALTINSTR_REPLACEMENT(newinsn2, 2) \ + ALTINSTR_REPLACEMENT(newinsn3, 3) \ ".popsection\n" /* @@ -206,6 +215,9 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") +#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \ + asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory") + /* * Alternative inline assembly with input. * @@ -271,6 +283,116 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_SMP + .macro LOCK_PREFIX +672: lock + .pushsection .smp_locks,"a" + .balign 4 + .long 672b - . + .popsection + .endm +#else + .macro LOCK_PREFIX + .endm +#endif + +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +.macro ANNOTATE_IGNORE_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.ignore_alts + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len pad_len + .long \orig - . + .long \alt - . + .word \feature + .byte \orig_len + .byte \alt_len + .byte \pad_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140: + \oldinstr +141: + .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr +144: + .popsection +.endm + +#define old_len 141b-140b +#define new_len1 144f-143f +#define new_len2 145f-144f + +/* + * gas compatible max based on the idea from: + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas uses a "true" value of -1. + */ +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140: + \oldinstr +141: + .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr1 +144: + \newinstr2 +145: + .popsection +.endm + +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ + newinstr_yes, feature + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1728d4ce5730..16a51e7288d5 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -8,6 +8,7 @@ #include #include +#include enum cpuid_leafs { @@ -175,39 +176,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" - "2:\n" - ".skip -(((5f-4f) - (2b-1b)) > 0) * " - "((5f-4f) - (2b-1b)),0x90\n" - "3:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 4f - .\n" /* repl offset */ - " .word %P[always]\n" /* always replace */ - " .byte 3b - 1b\n" /* src len */ - " .byte 5f - 4f\n" /* repl len */ - " .byte 3b - 2b\n" /* pad len */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "4: jmp %l[t_no]\n" - "5:\n" - ".previous\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 0\n" /* no replacement */ - " .word %P[feature]\n" /* feature bit */ - " .byte 3b - 1b\n" /* src len */ - " .byte 0\n" /* repl len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .altinstr_aux,\"ax\"\n" - "6:\n" - " testb %[bitnum],%[cap_byte]\n" - " jnz %l[t_yes]\n" - " jmp %l[t_no]\n" - ".previous\n" + asm_volatile_goto( + ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") + ".section .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum],%[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".previous\n" : : [feature] "i" (bit), - [always] "i" (X86_FEATURE_ALWAYS), [bitnum] "i" (1 << (bit & 7)), [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) : : t_yes, t_no); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index cc96e26d69f7..b440c950246d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -236,6 +236,8 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 144d70ea4393..c5ce9845c999 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -109,18 +109,13 @@ static __always_inline unsigned long arch_local_irq_save(void) } #else -#define ENABLE_INTERRUPTS(x) sti -#define DISABLE_INTERRUPTS(x) cli - #ifdef CONFIG_X86_64 #ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(x) pushfq; popq %rax +#define SAVE_FLAGS pushfq; popq %rax #endif #define INTERRUPT_RETURN jmp native_iret -#else -#define INTERRUPT_RETURN iret #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ccf60a809a17..e7be720062a8 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -63,7 +63,7 @@ typedef int (*hyperv_fill_flush_list_func)( static __always_inline void hv_setup_sched_clock(void *sched_clock) { #ifdef CONFIG_PARAVIRT - pv_ops.time.sched_clock = sched_clock; + paravirt_set_sched_clock(sched_clock); #endif } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index cb9ad6b73973..529f8e9380d8 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,7 +7,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4abf110e2243..43992e5c52c2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -15,11 +15,20 @@ #include #include #include +#include #include -static inline unsigned long long paravirt_sched_clock(void) +u64 dummy_steal_clock(int cpu); +u64 dummy_sched_clock(void); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); +DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock); + +void paravirt_set_sched_clock(u64 (*func)(void)); + +static inline u64 paravirt_sched_clock(void) { - return PVOP_CALL0(unsigned long long, time.sched_clock); + return static_call(pv_sched_clock)(); } struct static_key; @@ -33,9 +42,13 @@ bool pv_is_native_vcpu_is_preempted(void); static inline u64 paravirt_steal_clock(int cpu) { - return PVOP_CALL1(u64, time.steal_clock, cpu); + return static_call(pv_steal_clock)(cpu); } +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void __init paravirt_set_cap(void); +#endif + /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { @@ -122,7 +135,9 @@ static inline void write_cr0(unsigned long x) static inline unsigned long read_cr2(void) { - return PVOP_CALLEE0(unsigned long, mmu.read_cr2); + return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, + "mov %%cr2, %%rax;", + ALT_NOT(X86_FEATURE_XENPV)); } static inline void write_cr2(unsigned long x) @@ -132,12 +147,14 @@ static inline void write_cr2(unsigned long x) static inline unsigned long __read_cr3(void) { - return PVOP_CALL0(unsigned long, mmu.read_cr3); + return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3, + "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV)); } static inline void write_cr3(unsigned long x) { - PVOP_VCALL1(mmu.write_cr3, x); + PVOP_ALT_VCALL1(mmu.write_cr3, x, + "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV)); } static inline void __write_cr4(unsigned long x) @@ -157,7 +174,7 @@ static inline void halt(void) static inline void wbinvd(void) { - PVOP_VCALL0(cpu.wbinvd); + PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV)); } static inline u64 paravirt_read_msr(unsigned msr) @@ -371,22 +388,28 @@ static inline void paravirt_release_p4d(unsigned long pfn) static inline pte_t __pte(pteval_t val) { - return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) }; + return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)) }; } static inline pteval_t pte_val(pte_t pte) { - return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte); + return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); } static inline pgd_t __pgd(pgdval_t val) { - return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) }; + return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)) }; } static inline pgdval_t pgd_val(pgd_t pgd) { - return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd); + return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION @@ -419,12 +442,15 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) static inline pmd_t __pmd(pmdval_t val) { - return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) }; + return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)) }; } static inline pmdval_t pmd_val(pmd_t pmd) { - return PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd); + return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); } static inline void set_pud(pud_t *pudp, pud_t pud) @@ -436,14 +462,16 @@ static inline pud_t __pud(pudval_t val) { pudval_t ret; - ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val); + ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); return (pud_t) { ret }; } static inline pudval_t pud_val(pud_t pud) { - return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud); + return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); } static inline void pud_clear(pud_t *pudp) @@ -462,14 +490,17 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) static inline p4d_t __p4d(p4dval_t val) { - p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val); + p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)); return (p4d_t) { ret }; } static inline p4dval_t p4d_val(p4d_t p4d) { - return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d); + return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); } static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) @@ -556,7 +587,9 @@ static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) { - PVOP_VCALLEE1(lock.queued_spin_unlock, lock); + PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock, + "movb $0, (%%" _ASM_ARG1 ");", + ALT_NOT(X86_FEATURE_PVUNLOCK)); } static __always_inline void pv_wait(u8 *ptr, u8 val) @@ -571,7 +604,9 @@ static __always_inline void pv_kick(int cpu) static __always_inline bool pv_vcpu_is_preempted(long cpu) { - return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu); + return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu, + "xor %%" _ASM_AX ", %%" _ASM_AX ";", + ALT_NOT(X86_FEATURE_VCPUPREEMPT)); } void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); @@ -645,17 +680,18 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); #ifdef CONFIG_PARAVIRT_XXL static inline notrace unsigned long arch_local_save_flags(void) { - return PVOP_CALLEE0(unsigned long, irq.save_fl); + return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;", + ALT_NOT(X86_FEATURE_XENPV)); } static inline notrace void arch_local_irq_disable(void) { - PVOP_VCALLEE0(irq.irq_disable); + PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV)); } static inline notrace void arch_local_irq_enable(void) { - PVOP_VCALLEE0(irq.irq_enable); + PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV)); } static inline notrace unsigned long arch_local_irq_save(void) @@ -700,84 +736,27 @@ extern void default_banner(void); .popsection -#define COND_PUSH(set, mask, reg) \ - .if ((~(set)) & mask); push %reg; .endif -#define COND_POP(set, mask, reg) \ - .if ((~(set)) & mask); pop %reg; .endif - #ifdef CONFIG_X86_64 - -#define PV_SAVE_REGS(set) \ - COND_PUSH(set, CLBR_RAX, rax); \ - COND_PUSH(set, CLBR_RCX, rcx); \ - COND_PUSH(set, CLBR_RDX, rdx); \ - COND_PUSH(set, CLBR_RSI, rsi); \ - COND_PUSH(set, CLBR_RDI, rdi); \ - COND_PUSH(set, CLBR_R8, r8); \ - COND_PUSH(set, CLBR_R9, r9); \ - COND_PUSH(set, CLBR_R10, r10); \ - COND_PUSH(set, CLBR_R11, r11) -#define PV_RESTORE_REGS(set) \ - COND_POP(set, CLBR_R11, r11); \ - COND_POP(set, CLBR_R10, r10); \ - COND_POP(set, CLBR_R9, r9); \ - COND_POP(set, CLBR_R8, r8); \ - COND_POP(set, CLBR_RDI, rdi); \ - COND_POP(set, CLBR_RSI, rsi); \ - COND_POP(set, CLBR_RDX, rdx); \ - COND_POP(set, CLBR_RCX, rcx); \ - COND_POP(set, CLBR_RAX, rax) +#ifdef CONFIG_PARAVIRT_XXL #define PARA_PATCH(off) ((off) / 8) #define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) -#else -#define PV_SAVE_REGS(set) \ - COND_PUSH(set, CLBR_EAX, eax); \ - COND_PUSH(set, CLBR_EDI, edi); \ - COND_PUSH(set, CLBR_ECX, ecx); \ - COND_PUSH(set, CLBR_EDX, edx) -#define PV_RESTORE_REGS(set) \ - COND_POP(set, CLBR_EDX, edx); \ - COND_POP(set, CLBR_ECX, ecx); \ - COND_POP(set, CLBR_EDI, edi); \ - COND_POP(set, CLBR_EAX, eax) -#define PARA_PATCH(off) ((off) / 4) -#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4) -#define PARA_INDIRECT(addr) *%cs:addr -#endif - -#ifdef CONFIG_PARAVIRT_XXL #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(PV_CPU_iret), \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);) + ANNOTATE_RETPOLINE_SAFE; \ + ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \ + X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;") -#define DISABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) - -#define ENABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) -#endif - -#ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT_XXL #ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(clobbers) \ - PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) +.macro PARA_IRQ_save_fl + PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), + ANNOTATE_RETPOLINE_SAFE; + call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);) +.endm + +#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \ + ALT_NOT(X86_FEATURE_XENPV) #endif #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ @@ -800,5 +779,11 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { } #endif + +#ifndef CONFIG_PARAVIRT_SPINLOCKS +static inline void paravirt_set_cap(void) +{ +} +#endif #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index de87087d3bde..9d1ddb7b4350 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -3,7 +3,6 @@ #define _ASM_X86_PARAVIRT_TYPES_H /* Bitmask of what can be clobbered: usually at least eax. */ -#define CLBR_NONE 0 #define CLBR_EAX (1 << 0) #define CLBR_ECX (1 << 1) #define CLBR_EDX (1 << 2) @@ -15,7 +14,6 @@ #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) #define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) -#define CLBR_SCRATCH (0) #else #define CLBR_RAX CLBR_EAX #define CLBR_RCX CLBR_ECX @@ -32,12 +30,9 @@ #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ CLBR_RCX | CLBR_R8 | CLBR_R9) #define CLBR_RET_REG (CLBR_RAX) -#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) #endif /* X86_64 */ -#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) - #ifndef __ASSEMBLY__ #include @@ -73,19 +68,6 @@ struct pv_info { const char *name; }; -struct pv_init_ops { - /* - * Patch may replace one of the defined code sequences with - * arbitrary code, subject to the same register constraints. - * This generally means the code is not free to clobber any - * registers other than EAX. The patch function should return - * the number of bytes of code generated, as we nop pad the - * rest in generic code. - */ - unsigned (*patch)(u8 type, void *insn_buff, - unsigned long addr, unsigned len); -} __no_randomize_layout; - #ifdef CONFIG_PARAVIRT_XXL struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ @@ -95,11 +77,6 @@ struct pv_lazy_ops { } __no_randomize_layout; #endif -struct pv_time_ops { - unsigned long long (*sched_clock)(void); - unsigned long long (*steal_clock)(int cpu); -} __no_randomize_layout; - struct pv_cpu_ops { /* hooks for various privileged instructions */ void (*io_delay)(void); @@ -156,10 +133,6 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); - /* Normal iret. Jump to this with the standard iret stack - frame set up. */ - void (*iret)(void); - void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); #endif @@ -290,8 +263,6 @@ struct pv_lock_ops { * number for each function using the offset which we use to indicate * what to patch. */ struct paravirt_patch_template { - struct pv_init_ops init; - struct pv_time_ops time; struct pv_cpu_ops cpu; struct pv_irq_ops irq; struct pv_mmu_ops mmu; @@ -300,6 +271,7 @@ struct paravirt_patch_template { extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; +extern void (*paravirt_iret)(void); #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) @@ -331,11 +303,7 @@ extern struct paravirt_patch_template pv_ops; /* Simple instruction patching code. */ #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" -unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len); -unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len); -unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end); - -unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len); +unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len); int paravirt_disable_iospace(void); @@ -414,11 +382,9 @@ int paravirt_disable_iospace(void); * makes sure the incoming and outgoing types are always correct. */ #ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS \ +#define PVOP_CALL_ARGS \ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS - #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) @@ -434,12 +400,10 @@ int paravirt_disable_iospace(void); #define VEXTRA_CLOBBERS #else /* CONFIG_X86_64 */ /* [re]ax isn't an arg, but the return val */ -#define PVOP_VCALL_ARGS \ +#define PVOP_CALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ __edx = __edx, __ecx = __ecx, __eax = __eax; -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS - #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) @@ -464,152 +428,138 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)pv_ops.op) #endif -#define PVOP_RETMASK(rettype) \ +#define PVOP_RETVAL(rettype) \ ({ unsigned long __mask = ~0UL; \ + BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long)); \ switch (sizeof(rettype)) { \ case 1: __mask = 0xffUL; break; \ case 2: __mask = 0xffffUL; break; \ case 4: __mask = 0xffffffffUL; break; \ default: break; \ } \ - __mask; \ + __mask & __eax; \ }) -#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ - pre, post, ...) \ +#define ____PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...) \ ({ \ - rettype __ret; \ PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ - /* This is 32-bit specific, but is okay in 64-bit */ \ - /* since this condition will never hold */ \ - if (sizeof(rettype) > sizeof(unsigned long)) { \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ - : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ - ##__VA_ARGS__ \ - : "memory", "cc" extra_clbr); \ - __ret = (rettype)((((u64)__edx) << 32) | __eax); \ - } else { \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ - : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ - ##__VA_ARGS__ \ - : "memory", "cc" extra_clbr); \ - __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \ - } \ - __ret; \ - }) - -#define __PVOP_CALL(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ - EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) - -#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ - PVOP_CALLEE_CLOBBERS, , \ - pre, post, ##__VA_ARGS__) - - -#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ - ({ \ - PVOP_VCALL_ARGS; \ - PVOP_TEST_NULL(op); \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ + ret; \ }) -#define __PVOP_VCALL(op, pre, post, ...) \ - ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ - VEXTRA_CLOBBERS, \ - pre, post, ##__VA_ARGS__) +#define ____PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \ + extra_clbr, ...) \ + ({ \ + PVOP_CALL_ARGS; \ + PVOP_TEST_NULL(op); \ + asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), \ + alt, cond) \ + : call_clbr, ASM_CALL_CONSTRAINT \ + : paravirt_type(op), \ + paravirt_clobber(clbr), \ + ##__VA_ARGS__ \ + : "memory", "cc" extra_clbr); \ + ret; \ + }) -#define __PVOP_VCALLEESAVE(op, pre, post, ...) \ - ____PVOP_VCALL(op.func, CLBR_RET_REG, \ - PVOP_VCALLEE_CLOBBERS, , \ - pre, post, ##__VA_ARGS__) +#define __PVOP_CALL(rettype, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY, \ + PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__) +#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\ + PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \ + ##__VA_ARGS__) + +#define __PVOP_CALLEESAVE(rettype, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG, \ + PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) + +#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \ + CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) + + +#define __PVOP_VCALL(op, ...) \ + (void)____PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, ##__VA_ARGS__) + +#define __PVOP_ALT_VCALL(op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY, \ + PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \ + ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(op, ...) \ + (void)____PVOP_CALL(, op.func, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) + +#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) #define PVOP_CALL0(rettype, op) \ - __PVOP_CALL(rettype, op, "", "") + __PVOP_CALL(rettype, op) #define PVOP_VCALL0(op) \ - __PVOP_VCALL(op, "", "") + __PVOP_VCALL(op) +#define PVOP_ALT_CALL0(rettype, op, alt, cond) \ + __PVOP_ALT_CALL(rettype, op, alt, cond) +#define PVOP_ALT_VCALL0(op, alt, cond) \ + __PVOP_ALT_VCALL(op, alt, cond) #define PVOP_CALLEE0(rettype, op) \ - __PVOP_CALLEESAVE(rettype, op, "", "") + __PVOP_CALLEESAVE(rettype, op) #define PVOP_VCALLEE0(op) \ - __PVOP_VCALLEESAVE(op, "", "") + __PVOP_VCALLEESAVE(op) +#define PVOP_ALT_CALLEE0(rettype, op, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond) +#define PVOP_ALT_VCALLEE0(op, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(op, alt, cond) #define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1)) #define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALL1(op, arg1, alt, cond) \ + __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1)) #define PVOP_CALLEE1(rettype, op, arg1) \ - __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1)) #define PVOP_VCALLEE1(op, arg1) \ - __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1)) #define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) #define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) - -#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ - __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) -#define PVOP_VCALLEE2(op, arg1, arg2) \ - __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) - + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), \ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) #define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), \ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) -/* This is the only difference in x86_64. We can make it much simpler */ -#ifdef CONFIG_X86_32 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ __PVOP_CALL(rettype, op, \ - "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ - PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) -#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, \ - "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) -#else -#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, "", "", \ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, "", "", \ - PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -#endif /* Lazy mode for batching updates / context switch */ enum paravirt_lazy_mode { diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 0bc9b0895f33..d17b39893b79 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -11,6 +11,7 @@ #include #include +#include /* "Raw" instruction opcodes */ #define __ASM_CLAC ".byte 0x0f,0x01,0xca" @@ -18,8 +19,6 @@ #ifdef __ASSEMBLY__ -#include - #ifdef CONFIG_X86_SMAP #define ASM_CLAC \ @@ -37,8 +36,6 @@ #else /* __ASSEMBLY__ */ -#include - #ifdef CONFIG_X86_SMAP static __always_inline void clac(void) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2ddf08351f0b..0704c2a94272 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -35,7 +35,6 @@ KASAN_SANITIZE_sev-es.o := n KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD_test_nx.o := y -OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y ifdef CONFIG_FRAME_POINTER OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y @@ -121,7 +120,7 @@ obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o +obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ff359b3a30e7..f902f28d9dff 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -28,6 +28,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -388,21 +389,31 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, */ for (a = start; a < end; a++) { int insn_buff_sz = 0; + /* Mask away "NOT" flag bit for feature to test. */ + u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV; instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->instrlen > sizeof(insn_buff)); - BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); - if (!boot_cpu_has(a->cpuid)) { + BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32); + + /* + * Patch if either: + * - feature is present + * - feature not present but ALTINSTR_FLAG_INV is set to mean, + * patch if feature is *NOT* present. + */ + if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) { if (a->padlen > 1) optimize_nops(a, instr); continue; } - DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", - a->cpuid >> 5, - a->cpuid & 0x1f, + DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", + (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "", + feature >> 5, + feature & 0x1f, instr, instr, a->instrlen, replacement, a->replacementlen, a->padlen); @@ -605,7 +616,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ memcpy(insn_buff, p->instr, p->len); - used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len); + used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len); BUG_ON(used > p->len); @@ -723,6 +734,33 @@ void __init alternative_instructions(void) * patching. */ + /* + * Paravirt patching and alternative patching can be combined to + * replace a function call with a short direct code sequence (e.g. + * by setting a constant return value instead of doing that in an + * external function). + * In order to make this work the following sequence is required: + * 1. set (artificial) features depending on used paravirt + * functions which can later influence alternative patching + * 2. apply paravirt patching (generally replacing an indirect + * function call with a direct one) + * 3. apply alternative patching (e.g. replacing a direct function + * call with a custom code sequence) + * Doing paravirt patching after alternative patching would clobber + * the optimization of the custom code with a function call again. + */ + paravirt_set_cap(); + + /* + * First patch paravirt functions, such that we overwrite the indirect + * call with the direct call. + */ + apply_paravirt(__parainstructions, __parainstructions_end); + + /* + * Then patch alternatives, such that those paravirt calls that are in + * alternatives can be overwritten by their immediate fragments. + */ apply_alternatives(__alt_instructions, __alt_instructions_end); #ifdef CONFIG_SMP @@ -741,8 +779,6 @@ void __init alternative_instructions(void) } #endif - apply_paravirt(__parainstructions, __parainstructions_end); - restart_nmi(); alternatives_patched = 1; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 60b9f42ce3c1..ecd3fd6993d1 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -61,13 +61,6 @@ static void __used common(void) OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); #endif -#ifdef CONFIG_PARAVIRT_XXL - BLANK(); - OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable); - OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable); - OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret); -#endif - #ifdef CONFIG_XEN BLANK(); OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index c6ede3b3d302..84fb8e3f3d1b 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -336,11 +337,11 @@ static void __init vmware_paravirt_ops_setup(void) vmware_cyc2ns_setup(); if (vmw_sched_clock) - pv_ops.time.sched_clock = vmware_sched_clock; + paravirt_set_sched_clock(vmware_sched_clock); if (vmware_is_stealclock_available()) { has_steal_clock = true; - pv_ops.time.steal_clock = vmware_steal_clock; + static_call_update(pv_steal_clock, vmware_steal_clock); /* We use reboot notifier only to disable steal clock */ register_reboot_notifier(&vmware_pv_reboot_nb); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5e78e01ca3b4..351ba99f6009 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -650,7 +650,7 @@ static void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { has_steal_clock = 1; - pv_ops.time.steal_clock = kvm_steal_clock; + static_call_update(pv_steal_clock, kvm_steal_clock); } if (pv_tlb_flush_supported()) { diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1fc0962c89c0..d37ed4e1d033 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -106,7 +106,7 @@ static inline void kvm_sched_clock_init(bool stable) if (!stable) clear_sched_clock_stable(); kvm_sched_clock_offset = kvm_clock_read(); - pv_ops.time.sched_clock = kvm_sched_clock_read; + paravirt_set_sched_clock(kvm_sched_clock_read); pr_info("kvm-clock: using sched offset of %llu cycles", kvm_sched_clock_offset); diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 4f75d0cf6305..9e1ea99ad9df 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -32,3 +32,12 @@ bool pv_is_native_vcpu_is_preempted(void) return pv_ops.lock.vcpu_is_preempted.func == __raw_callee_save___native_vcpu_is_preempted; } + +void __init paravirt_set_cap(void) +{ + if (!pv_is_native_spin_unlock()) + setup_force_cpu_cap(X86_FEATURE_PVUNLOCK); + + if (!pv_is_native_vcpu_is_preempted()) + setup_force_cpu_cap(X86_FEATURE_VCPUPREEMPT); +} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c60222ab8ab9..d0730264786b 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -52,7 +53,10 @@ void __init default_banner(void) } /* Undefined instruction for dealing with missing ops pointers. */ -static const unsigned char ud2a[] = { 0x0f, 0x0b }; +static void paravirt_BUG(void) +{ + BUG(); +} struct branch { unsigned char opcode; @@ -85,25 +89,6 @@ u64 notrace _paravirt_ident_64(u64 x) { return x; } - -static unsigned paravirt_patch_jmp(void *insn_buff, const void *target, - unsigned long addr, unsigned len) -{ - struct branch *b = insn_buff; - unsigned long delta = (unsigned long)target - (addr+5); - - if (len < 5) { -#ifdef CONFIG_RETPOLINE - WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr); -#endif - return len; /* call too long for patch site */ - } - - b->opcode = 0xe9; /* jmp */ - b->delta = delta; - - return 5; -} #endif DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); @@ -114,8 +99,8 @@ void __init native_pv_lock_init(void) static_branch_disable(&virt_spin_lock_key); } -unsigned paravirt_patch_default(u8 type, void *insn_buff, - unsigned long addr, unsigned len) +unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, + unsigned int len) { /* * Neat trick to map patch type back to the call within the @@ -125,20 +110,10 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned ret; if (opfunc == NULL) - /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a)); + /* If there's no function, patch it with paravirt_BUG() */ + ret = paravirt_patch_call(insn_buff, paravirt_BUG, addr, len); else if (opfunc == _paravirt_nop) ret = 0; - -#ifdef CONFIG_PARAVIRT_XXL - /* identity functions just return their single argument */ - else if (opfunc == _paravirt_ident_64) - ret = paravirt_patch_ident_64(insn_buff, len); - - else if (type == PARAVIRT_PATCH(cpu.iret)) - /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len); -#endif else /* Otherwise call the function. */ ret = paravirt_patch_call(insn_buff, opfunc, addr, len); @@ -146,19 +121,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff, return ret; } -unsigned paravirt_patch_insns(void *insn_buff, unsigned len, - const char *start, const char *end) -{ - unsigned insn_len = end - start; - - /* Alternative instruction is too large for the patch site and we cannot continue: */ - BUG_ON(insn_len > len || start == NULL); - - memcpy(insn_buff, start, insn_len); - - return insn_len; -} - struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -167,6 +129,14 @@ static u64 native_steal_clock(int cpu) return 0; } +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); +DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock); + +void paravirt_set_sched_clock(u64 (*func)(void)) +{ + static_call_update(pv_sched_clock, func); +} + /* These are in entry.S */ extern void native_iret(void); @@ -269,13 +239,6 @@ struct pv_info pv_info = { #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) struct paravirt_patch_template pv_ops = { - /* Init ops. */ - .init.patch = native_patch, - - /* Time ops. */ - .time.sched_clock = native_sched_clock, - .time.steal_clock = native_steal_clock, - /* Cpu ops. */ .cpu.io_delay = native_io_delay, @@ -308,8 +271,6 @@ struct paravirt_patch_template pv_ops = { .cpu.load_sp0 = native_load_sp0, - .cpu.iret = native_iret, - #ifdef CONFIG_X86_IOPL_IOPERM .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap, .cpu.update_io_bitmap = native_tss_update_io_bitmap, @@ -414,6 +375,8 @@ struct paravirt_patch_template pv_ops = { NOKPROBE_SYMBOL(native_get_debugreg); NOKPROBE_SYMBOL(native_set_debugreg); NOKPROBE_SYMBOL(native_load_idt); + +void (*paravirt_iret)(void) = native_iret; #endif EXPORT_SYMBOL(pv_ops); diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c deleted file mode 100644 index abd27ec67397..000000000000 --- a/arch/x86/kernel/paravirt_patch.c +++ /dev/null @@ -1,99 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -#include -#include - -#define PSTART(d, m) \ - patch_data_##d.m - -#define PEND(d, m) \ - (PSTART(d, m) + sizeof(patch_data_##d.m)) - -#define PATCH(d, m, insn_buff, len) \ - paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m)) - -#define PATCH_CASE(ops, m, data, insn_buff, len) \ - case PARAVIRT_PATCH(ops.m): \ - return PATCH(data, ops##_##m, insn_buff, len) - -#ifdef CONFIG_PARAVIRT_XXL -struct patch_xxl { - const unsigned char irq_irq_disable[1]; - const unsigned char irq_irq_enable[1]; - const unsigned char irq_save_fl[2]; - const unsigned char mmu_read_cr2[3]; - const unsigned char mmu_read_cr3[3]; - const unsigned char mmu_write_cr3[3]; - const unsigned char cpu_wbinvd[2]; - const unsigned char mov64[3]; -}; - -static const struct patch_xxl patch_data_xxl = { - .irq_irq_disable = { 0xfa }, // cli - .irq_irq_enable = { 0xfb }, // sti - .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax - .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax - .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax - .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3 - .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd - .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax -}; - -unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len) -{ - return PATCH(xxl, mov64, insn_buff, len); -} -# endif /* CONFIG_PARAVIRT_XXL */ - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -struct patch_lock { - unsigned char queued_spin_unlock[3]; - unsigned char vcpu_is_preempted[2]; -}; - -static const struct patch_lock patch_data_lock = { - .vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax - -# ifdef CONFIG_X86_64 - .queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi) -# else - .queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax) -# endif -}; -#endif /* CONFIG_PARAVIRT_SPINLOCKS */ - -unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, - unsigned int len) -{ - switch (type) { - -#ifdef CONFIG_PARAVIRT_XXL - PATCH_CASE(irq, save_fl, xxl, insn_buff, len); - PATCH_CASE(irq, irq_enable, xxl, insn_buff, len); - PATCH_CASE(irq, irq_disable, xxl, insn_buff, len); - - PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len); - PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len); - PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len); - - PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len); -#endif - -#ifdef CONFIG_PARAVIRT_SPINLOCKS - case PARAVIRT_PATCH(lock.queued_spin_unlock): - if (pv_is_native_spin_unlock()) - return PATCH(lock, queued_spin_unlock, insn_buff, len); - break; - - case PARAVIRT_PATCH(lock.vcpu_is_preempted): - if (pv_is_native_vcpu_is_preempted()) - return PATCH(lock, vcpu_is_preempted, insn_buff, len); - break; -#endif - default: - break; - } - - return paravirt_patch_default(type, insn_buff, addr, len); -} diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index f70dffc2771f..9f592923f956 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -254,7 +255,7 @@ unsigned long long sched_clock(void) bool using_native_sched_clock(void) { - return pv_ops.time.sched_clock == native_sched_clock; + return static_call_query(pv_sched_clock) == native_sched_clock; } #else unsigned long long diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 3b6544111ac9..16bc9130e7a5 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -6,7 +6,7 @@ */ #include -#include +#include /* if you want SMP support, implement these with real spinlocks */ .macro LOCK reg diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 1c5c81c16b06..ce6935690766 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -6,7 +6,7 @@ */ #include -#include +#include .macro read64 reg movl %ebx, %eax diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 2402d4c489d2..db4b4f9197c7 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -3,7 +3,7 @@ #include #include -#include +#include #include /* diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 77b9b2a3b5c8..57b79c577496 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 1e299ac73c86..1cc9da6e29c7 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include .pushsection .noinstr.text, "ax" diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 41902fe8b859..64801010d312 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -8,7 +8,7 @@ */ #include #include -#include +#include #include #undef memmove diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 0bfd26e4ca9e..9827ae267f96 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -3,7 +3,7 @@ #include #include -#include +#include #include /* diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index f6fb1d218dcc..6bb74b5c238c 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 33e797b48f40..17503fed2017 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1070,8 +1070,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_pmc = xen_read_pmc, - .iret = xen_iret, - .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, .load_gdt = xen_load_gdt, @@ -1232,8 +1230,8 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; - pv_ops.init.patch = paravirt_patch_default; pv_ops.cpu = xen_cpu_ops; + paravirt_iret = xen_iret; xen_init_irq_ops(); /* diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 91f5b330dcc6..d9c945ee1100 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -379,11 +379,6 @@ void xen_timer_resume(void) } } -static const struct pv_time_ops xen_time_ops __initconst = { - .sched_clock = xen_sched_clock, - .steal_clock = xen_steal_clock, -}; - static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; static u64 xen_clock_value_saved; @@ -525,17 +520,24 @@ static void __init xen_time_init(void) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } -void __init xen_init_time_ops(void) +static void __init xen_init_time_common(void) { xen_sched_clock_offset = xen_clocksource_read(); - pv_ops.time = xen_time_ops; + static_call_update(pv_steal_clock, xen_steal_clock); + paravirt_set_sched_clock(xen_sched_clock); + + x86_platform.calibrate_tsc = xen_tsc_khz; + x86_platform.get_wallclock = xen_get_wallclock; +} + +void __init xen_init_time_ops(void) +{ + xen_init_time_common(); x86_init.timers.timer_init = xen_time_init; x86_init.timers.setup_percpu_clockev = x86_init_noop; x86_cpuinit.setup_percpu_clockev = x86_init_noop; - x86_platform.calibrate_tsc = xen_tsc_khz; - x86_platform.get_wallclock = xen_get_wallclock; /* Dom0 uses the native method to set the hardware RTC. */ if (!xen_initial_domain()) x86_platform.set_wallclock = xen_set_wallclock; @@ -569,13 +571,11 @@ void __init xen_hvm_init_time_ops(void) return; } - xen_sched_clock_offset = xen_clocksource_read(); - pv_ops.time = xen_time_ops; + xen_init_time_common(); + x86_init.timers.setup_percpu_clockev = xen_time_init; x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; - x86_platform.calibrate_tsc = xen_tsc_khz; - x86_platform.get_wallclock = xen_get_wallclock; x86_platform.set_wallclock = xen_set_wallclock; } #endif diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 108edbcbc040..152dd33bb223 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -175,7 +176,7 @@ void __init xen_time_setup_guest(void) xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_runstate_update_flag); - pv_ops.time.steal_clock = xen_steal_clock; + static_call_update(pv_steal_clock, xen_steal_clock); static_key_slow_inc(¶virt_steal_enabled); if (xen_runstate_remote) diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 85ecc789f4ff..e01b61ab86b1 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -20,6 +20,7 @@ * static_call(name)(args...); * static_call_cond(name)(args...); * static_call_update(name, func); + * static_call_query(name); * * Usage example: * @@ -91,6 +92,10 @@ * * which will include the required value tests to avoid NULL-pointer * dereferences. + * + * To query which function is currently set to be called, use: + * + * func = static_call_query(name); */ #include @@ -118,6 +123,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool STATIC_CALL_TRAMP_ADDR(name), func); \ }) +#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func)) + #ifdef CONFIG_HAVE_STATIC_CALL_INLINE extern int __init static_call_init(void); @@ -128,16 +135,6 @@ struct static_call_mod { struct static_call_site *sites; }; -struct static_call_key { - void *func; - union { - /* bit 0: 0 = mods, 1 = sites */ - unsigned long type; - struct static_call_mod *mods; - struct static_call_site *sites; - }; -}; - /* For finding the key associated with a trampoline */ struct static_call_tramp_key { s32 tramp; @@ -187,10 +184,6 @@ extern long __static_call_return0(void); static inline int static_call_init(void) { return 0; } -struct static_call_key { - void *func; -}; - #define __DEFINE_STATIC_CALL(name, _func, _func_init) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ @@ -205,6 +198,7 @@ struct static_call_key { }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call_cond(name) (void)__static_call(name) static inline @@ -243,10 +237,6 @@ static inline long __static_call_return0(void) static inline int static_call_init(void) { return 0; } -struct static_call_key { - void *func; -}; - static inline long __static_call_return0(void) { return 0; diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index ae5662d368b9..5a00b8b2cf9f 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -58,11 +58,25 @@ struct static_call_site { __raw_static_call(name); \ }) +struct static_call_key { + void *func; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; +}; + #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ #define __STATIC_CALL_ADDRESSABLE(name) #define __static_call(name) __raw_static_call(name) +struct static_call_key { + void *func; +}; + #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ #ifdef MODULE @@ -77,6 +91,10 @@ struct static_call_site { #else +struct static_call_key { + void *func; +}; + #define static_call(name) \ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index ae5662d368b9..5a00b8b2cf9f 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -58,11 +58,25 @@ struct static_call_site { __raw_static_call(name); \ }) +struct static_call_key { + void *func; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; +}; + #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ #define __STATIC_CALL_ADDRESSABLE(name) #define __static_call(name) __raw_static_call(name) +struct static_call_key { + void *func; +}; + #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ #ifdef MODULE @@ -77,6 +91,10 @@ struct static_call_site { #else +struct static_call_key { + void *func; +}; + #define static_call(name) \ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))