linux-stable/include/linux/compiler-gcc.h
Josh Poimboeuf 3193c0836f bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()
On x86-64, with CONFIG_RETPOLINE=n, GCC's "global common subexpression
elimination" optimization results in ___bpf_prog_run()'s jumptable code
changing from this:

	select_insn:
		jmp *jumptable(, %rax, 8)
		...
	ALU64_ADD_X:
		...
		jmp *jumptable(, %rax, 8)
	ALU_ADD_X:
		...
		jmp *jumptable(, %rax, 8)

to this:

	select_insn:
		mov jumptable, %r12
		jmp *(%r12, %rax, 8)
		...
	ALU64_ADD_X:
		...
		jmp *(%r12, %rax, 8)
	ALU_ADD_X:
		...
		jmp *(%r12, %rax, 8)

The jumptable address is placed in a register once, at the beginning of
the function.  The function execution can then go through multiple
indirect jumps which rely on that same register value.  This has a few
issues:

1) Objtool isn't smart enough to be able to track such a register value
   across multiple recursive indirect jumps through the jump table.

2) With CONFIG_RETPOLINE enabled, this optimization actually results in
   a small slowdown.  I measured a ~4.7% slowdown in the test_bpf
   "tcpdump port 22" selftest.

   This slowdown is actually predicted by the GCC manual:

     Note: When compiling a program using computed gotos, a GCC
     extension, you may get better run-time performance if you
     disable the global common subexpression elimination pass by
     adding -fno-gcse to the command line.

So just disable the optimization for this function.

Fixes: e55a73251d ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/30c3ca29ba037afcbd860a8672eef0021addf9fe.1563413318.git.jpoimboe@redhat.com
2019-07-18 21:01:06 +02:00

174 lines
5.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_COMPILER_TYPES_H
#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
#endif
/*
* Common definitions for all gcc versions go here.
*/
#define GCC_VERSION (__GNUC__ * 10000 \
+ __GNUC_MINOR__ * 100 \
+ __GNUC_PATCHLEVEL__)
#if GCC_VERSION < 40600
# error Sorry, your compiler is too old - please upgrade it.
#endif
/* Optimization barrier */
/* The "volatile" is due to gcc bugs */
#define barrier() __asm__ __volatile__("": : :"memory")
/*
* This version is i.e. to prevent dead stores elimination on @ptr
* where gcc and llvm may behave differently when otherwise using
* normal barrier(): while gcc behavior gets along with a normal
* barrier(), llvm needs an explicit input variable to be assumed
* clobbered. The issue is as follows: while the inline asm might
* access any memory it wants, the compiler could have fit all of
* @ptr into memory registers instead, and since @ptr never escaped
* from that, it proved that the inline asm wasn't touching any of
* it. This version works well with both compilers, i.e. we're telling
* the compiler that the inline asm absolutely may see the contents
* of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
*/
#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
/*
* This macro obfuscates arithmetic on a variable address so that gcc
* shouldn't recognize the original var, and make assumptions about it.
*
* This is needed because the C standard makes it undefined to do
* pointer arithmetic on "objects" outside their boundaries and the
* gcc optimizers assume this is the case. In particular they
* assume such arithmetic does not wrap.
*
* A miscompilation has been observed because of this on PPC.
* To work around it we hide the relationship of the pointer and the object
* using this macro.
*
* Versions of the ppc64 compiler before 4.1 had a bug where use of
* RELOC_HIDE could trash r30. The bug can be worked around by changing
* the inline assembly constraint from =g to =r, in this particular
* case either is valid.
*/
#define RELOC_HIDE(ptr, off) \
({ \
unsigned long __ptr; \
__asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
(typeof(ptr)) (__ptr + (off)); \
})
/*
* A trick to suppress uninitialized variable warning without generating any
* code
*/
#define uninitialized_var(x) x = x
#ifdef CONFIG_RETPOLINE
#define __noretpoline __attribute__((__indirect_branch__("keep")))
#endif
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
#define __compiletime_warning(message) __attribute__((__warning__(message)))
#define __compiletime_error(message) __attribute__((__error__(message)))
#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
#define __latent_entropy __attribute__((latent_entropy))
#endif
/*
* calling noreturn functions, __builtin_unreachable() and __builtin_trap()
* confuse the stack allocation in gcc, leading to overly large stack
* frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
*
* Adding an empty inline assembly before it works around the problem
*/
#define barrier_before_unreachable() asm volatile("")
/*
* Mark a position in code as unreachable. This can be used to
* suppress control flow warnings after asm blocks that transfer
* control elsewhere.
*/
#define unreachable() \
do { \
annotate_unreachable(); \
barrier_before_unreachable(); \
__builtin_unreachable(); \
} while (0)
#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
#define __randomize_layout __attribute__((randomize_layout))
#define __no_randomize_layout __attribute__((no_randomize_layout))
/* This anon struct can add padding, so only enable it under randstruct. */
#define randomized_struct_fields_start struct {
#define randomized_struct_fields_end } __randomize_layout;
#endif
/*
* GCC 'asm goto' miscompiles certain code sequences:
*
* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
*
* Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
*
* (asm goto is automatically volatile - the naming reflects this.)
*/
#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
/*
* sparse (__CHECKER__) pretends to be gcc, but can't do constant
* folding in __builtin_bswap*() (yet), so don't set these for it.
*/
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__)
#define __HAVE_BUILTIN_BSWAP32__
#define __HAVE_BUILTIN_BSWAP64__
#if GCC_VERSION >= 40800
#define __HAVE_BUILTIN_BSWAP16__
#endif
#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */
#if GCC_VERSION >= 70000
#define KASAN_ABI_VERSION 5
#elif GCC_VERSION >= 50000
#define KASAN_ABI_VERSION 4
#elif GCC_VERSION >= 40902
#define KASAN_ABI_VERSION 3
#endif
#if __has_attribute(__no_sanitize_address__)
#define __no_sanitize_address __attribute__((no_sanitize_address))
#else
#define __no_sanitize_address
#endif
#if GCC_VERSION >= 50100
#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
#endif
/*
* Turn individual warnings and errors on and off locally, depending
* on version.
*/
#define __diag_GCC(version, severity, s) \
__diag_GCC_ ## version(__diag_GCC_ ## severity s)
/* Severity used in pragma directives */
#define __diag_GCC_ignore ignored
#define __diag_GCC_warn warning
#define __diag_GCC_error error
#define __diag_str1(s) #s
#define __diag_str(s) __diag_str1(s)
#define __diag(s) _Pragma(__diag_str(GCC diagnostic s))
#if GCC_VERSION >= 80000
#define __diag_GCC_8(s) __diag(s)
#else
#define __diag_GCC_8(s)
#endif
#define __no_fgcse __attribute__((optimize("-fno-gcse")))