powerpc/pseries: Use jump labels for hcall tracepoints

hcall tracepoints add quite a few instructions to our hcall path:

plpar_hcall:
	mr      r2,r2
	mfcr    r0
	stw     r0,8(r1)
	b       164		<---- start
	ld      r12,0(r2)
	std     r12,32(r1)
	cmpdi   r12,0
	beq     164		<---- end
...

We have an unconditional branch that gets noped out during boot and
a load/compare/branch. We also store the tracepoint value to the
stack for the hcall_exit path to use.

By using jump labels we can simplify this to just a single nop that
gets replaced with a branch when the tracepoint is enabled:

plpar_hcall:
	mr      r2,r2
	mfcr    r0
	stw     r0,8(r1)
	nop			<----
...

If jump labels are not enabled, we fall back to the old method.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Anton Blanchard 2014-07-03 15:52:03 +10:00 committed by Benjamin Herrenschmidt
parent 8fa5d4547e
commit cc1adb5f32
3 changed files with 142 additions and 41 deletions

View file

@ -10,6 +10,7 @@
* 2 of the License, or (at your option) any later version.
*/
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <asm/feature-fixups.h>
@ -42,4 +43,12 @@ struct jump_entry {
jump_label_t key;
};
#else
#define ARCH_STATIC_BRANCH(LABEL, KEY) \
1098: nop; \
.pushsection __jump_table, "aw"; \
FTR_ENTRY_LONG 1098b, LABEL, KEY; \
.popsection
#endif
#endif /* _ASM_POWERPC_JUMP_LABEL_H */

View file

@ -12,9 +12,13 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/jump_label.h>
.section ".text"
#ifdef CONFIG_TRACEPOINTS
#ifndef CONFIG_JUMP_LABEL
.section ".toc","aw"
.globl hcall_tracepoint_refcount
@ -22,21 +26,13 @@ hcall_tracepoint_refcount:
.llong 0
.section ".text"
#endif
/*
* precall must preserve all registers. use unused STK_PARAM()
* areas to save snapshots and opcode. We branch around this
* in early init (eg when populating the MMU hashtable) by using an
* unconditional cpu feature.
* areas to save snapshots and opcode.
*/
#define HCALL_INST_PRECALL(FIRST_REG) \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
ld r12,hcall_tracepoint_refcount@toc(r2); \
std r12,32(r1); \
cmpdi r12,0; \
beq+ 1f; \
mflr r0; \
std r3,STK_PARAM(R3)(r1); \
std r4,STK_PARAM(R4)(r1); \
@ -60,22 +56,13 @@ END_FTR_SECTION(0, 1); \
ld r8,STK_PARAM(R8)(r1); \
ld r9,STK_PARAM(R9)(r1); \
ld r10,STK_PARAM(R10)(r1); \
mtlr r0; \
1:
mtlr r0
/*
* postcall is performed immediately before function return which
* allows liberal use of volatile registers. We branch around this
* in early init (eg when populating the MMU hashtable) by using an
* unconditional cpu feature.
* allows liberal use of volatile registers.
*/
#define __HCALL_INST_POSTCALL \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
ld r12,32(r1); \
cmpdi r12,0; \
beq+ 1f; \
mflr r0; \
ld r6,STK_PARAM(R3)(r1); \
std r3,STK_PARAM(R3)(r1); \
@ -87,8 +74,7 @@ END_FTR_SECTION(0, 1); \
addi r1,r1,STACK_FRAME_OVERHEAD; \
ld r0,16(r1); \
ld r3,STK_PARAM(R3)(r1); \
mtlr r0; \
1:
mtlr r0
#define HCALL_INST_POSTCALL_NORETS \
li r5,0; \
@ -98,37 +84,62 @@ END_FTR_SECTION(0, 1); \
mr r5,BUFREG; \
__HCALL_INST_POSTCALL
#ifdef CONFIG_JUMP_LABEL
#define HCALL_BRANCH(LABEL) \
ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
#else
/*
* We branch around this in early init (eg when populating the MMU
* hashtable) by using an unconditional cpu feature.
*/
#define HCALL_BRANCH(LABEL) \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
ld r12,hcall_tracepoint_refcount@toc(r2); \
std r12,32(r1); \
cmpdi r12,0; \
bne- LABEL; \
1:
#endif
#else
#define HCALL_INST_PRECALL(FIRST_ARG)
#define HCALL_INST_POSTCALL_NORETS
#define HCALL_INST_POSTCALL(BUFREG)
#define HCALL_BRANCH(LABEL)
#endif
.text
_GLOBAL_TOC(plpar_hcall_norets)
HMT_MEDIUM
mfcr r0
stw r0,8(r1)
HCALL_INST_PRECALL(R4)
HCALL_BRANCH(plpar_hcall_norets_trace)
HVSC /* invoke the hypervisor */
HCALL_INST_POSTCALL_NORETS
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
#ifdef CONFIG_TRACEPOINTS
plpar_hcall_norets_trace:
HCALL_INST_PRECALL(R4)
HVSC
HCALL_INST_POSTCALL_NORETS
lwz r0,8(r1)
mtcrf 0xff,r0
blr
#endif
_GLOBAL_TOC(plpar_hcall)
HMT_MEDIUM
mfcr r0
stw r0,8(r1)
HCALL_INST_PRECALL(R5)
HCALL_BRANCH(plpar_hcall_trace)
std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
@ -147,12 +158,40 @@ _GLOBAL_TOC(plpar_hcall)
std r6, 16(r12)
std r7, 24(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
#ifdef CONFIG_TRACEPOINTS
plpar_hcall_trace:
HCALL_INST_PRECALL(R5)
std r4,STK_PARAM(R4)(r1)
mr r0,r4
mr r4,r5
mr r5,r6
mr r6,r7
mr r7,r8
mr r8,r9
mr r9,r10
HVSC
ld r12,STK_PARAM(R4)(r1)
std r4,0(r12)
std r5,8(r12)
std r6,16(r12)
std r7,24(r12)
HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
blr
#endif
/*
* plpar_hcall_raw can be called in real mode. kexec/kdump need some
@ -194,7 +233,7 @@ _GLOBAL_TOC(plpar_hcall9)
mfcr r0
stw r0,8(r1)
HCALL_INST_PRECALL(R5)
HCALL_BRANCH(plpar_hcall9_trace)
std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
@ -222,12 +261,49 @@ _GLOBAL_TOC(plpar_hcall9)
std r11,56(r12)
std r0, 64(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
#ifdef CONFIG_TRACEPOINTS
plpar_hcall9_trace:
HCALL_INST_PRECALL(R5)
std r4,STK_PARAM(R4)(r1)
mr r0,r4
mr r4,r5
mr r5,r6
mr r6,r7
mr r7,r8
mr r8,r9
mr r9,r10
ld r10,STK_PARAM(R11)(r1)
ld r11,STK_PARAM(R12)(r1)
ld r12,STK_PARAM(R13)(r1)
HVSC
mr r0,r12
ld r12,STK_PARAM(R4)(r1)
std r4,0(r12)
std r5,8(r12)
std r6,16(r12)
std r7,24(r12)
std r8,32(r12)
std r9,40(r12)
std r10,48(r12)
std r11,56(r12)
std r0,64(r12)
HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
blr
#endif
/* See plpar_hcall_raw to see why this is needed */
_GLOBAL(plpar_hcall9_raw)

View file

@ -26,6 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/console.h>
#include <linux/export.h>
#include <linux/static_key.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@ -649,6 +650,19 @@ EXPORT_SYMBOL(arch_free_page);
#endif
#ifdef CONFIG_TRACEPOINTS
#ifdef CONFIG_JUMP_LABEL
struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
void hcall_tracepoint_regfunc(void)
{
static_key_slow_inc(&hcall_tracepoint_key);
}
void hcall_tracepoint_unregfunc(void)
{
static_key_slow_dec(&hcall_tracepoint_key);
}
#else
/*
* We optimise our hcall path by placing hcall_tracepoint_refcount
* directly in the TOC so we can check if the hcall tracepoints are
@ -658,13 +672,6 @@ EXPORT_SYMBOL(arch_free_page);
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
extern long hcall_tracepoint_refcount;
/*
* Since the tracing code might execute hcalls we need to guard against
* recursion. One example of this are spinlocks calling H_YIELD on
* shared processor partitions.
*/
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
void hcall_tracepoint_regfunc(void)
{
hcall_tracepoint_refcount++;
@ -674,6 +681,15 @@ void hcall_tracepoint_unregfunc(void)
{
hcall_tracepoint_refcount--;
}
#endif
/*
* Since the tracing code might execute hcalls we need to guard against
* recursion. One example of this are spinlocks calling H_YIELD on
* shared processor partitions.
*/
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{