linux-stable/include/linux/compiler.h
Daniel Borkmann 7829fb09a2 lib: make memzero_explicit more robust against dead store elimination
In commit 0b053c9518 ("lib: memzero_explicit: use barrier instead
of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in
case LTO would decide to inline memzero_explicit() and eventually
find out it could be elimiated as dead store.

While using barrier() works well for the case of gcc, recent efforts
from LLVMLinux people suggest to use llvm as an alternative to gcc,
and there, Stephan found in a simple stand-alone user space example
that llvm could nevertheless optimize and thus elimitate the memset().
A similar issue has been observed in the referenced llvm bug report,
which is regarded as not-a-bug.

Based on some experiments, icc is a bit special on its own, while it
doesn't seem to eliminate the memset(), it could do so with an own
implementation, and then result in similar findings as with llvm.

The fix in this patch now works for all three compilers (also tested
with more aggressive optimization levels). Arguably, in the current
kernel tree it's more of a theoretical issue, but imho, it's better
to be pedantic about it.

It's clearly visible with gcc/llvm though, with the below code: if we
would have used barrier() only here, llvm would have omitted clearing,
not so with barrier_data() variant:

  static inline void memzero_explicit(void *s, size_t count)
  {
    memset(s, 0, count);
    barrier_data(s);
  }

  int main(void)
  {
    char buff[20];
    memzero_explicit(buff, sizeof(buff));
    return 0;
  }

  $ gcc -O2 test.c
  $ gdb a.out
  (gdb) disassemble main
  Dump of assembler code for function main:
   0x0000000000400400  <+0>: lea   -0x28(%rsp),%rax
   0x0000000000400405  <+5>: movq  $0x0,-0x28(%rsp)
   0x000000000040040e <+14>: movq  $0x0,-0x20(%rsp)
   0x0000000000400417 <+23>: movl  $0x0,-0x18(%rsp)
   0x000000000040041f <+31>: xor   %eax,%eax
   0x0000000000400421 <+33>: retq
  End of assembler dump.

  $ clang -O2 test.c
  $ gdb a.out
  (gdb) disassemble main
  Dump of assembler code for function main:
   0x00000000004004f0  <+0>: xorps  %xmm0,%xmm0
   0x00000000004004f3  <+3>: movaps %xmm0,-0x18(%rsp)
   0x00000000004004f8  <+8>: movl   $0x0,-0x8(%rsp)
   0x0000000000400500 <+16>: lea    -0x18(%rsp),%rax
   0x0000000000400505 <+21>: xor    %eax,%eax
   0x0000000000400507 <+23>: retq
  End of assembler dump.

As gcc, clang, but also icc defines __GNUC__, it's sufficient to define
this in compiler-gcc.h only to be picked up. For a fallback or otherwise
unsupported compiler, we define it as a barrier. Similarly, for ecc which
does not support gcc inline asm.

Reference: https://llvm.org/bugs/show_bug.cgi?id=15495
Reported-by: Stephan Mueller <smueller@chronox.de>
Tested-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Stephan Mueller <smueller@chronox.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: mancha security <mancha1@zoho.com>
Cc: Mark Charlebois <charlebm@gmail.com>
Cc: Behan Webster <behanw@converseincode.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-05-04 17:49:51 +08:00

468 lines
14 KiB
C

#ifndef __LINUX_COMPILER_H
#define __LINUX_COMPILER_H
#ifndef __ASSEMBLY__
#ifdef __CHECKER__
# define __user __attribute__((noderef, address_space(1)))
# define __kernel __attribute__((address_space(0)))
# define __safe __attribute__((safe))
# define __force __attribute__((force))
# define __nocast __attribute__((nocast))
# define __iomem __attribute__((noderef, address_space(2)))
# define __must_hold(x) __attribute__((context(x,1,1)))
# define __acquires(x) __attribute__((context(x,0,1)))
# define __releases(x) __attribute__((context(x,1,0)))
# define __acquire(x) __context__(x,1)
# define __release(x) __context__(x,-1)
# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
# define __percpu __attribute__((noderef, address_space(3)))
#ifdef CONFIG_SPARSE_RCU_POINTER
# define __rcu __attribute__((noderef, address_space(4)))
#else
# define __rcu
#endif
extern void __chk_user_ptr(const volatile void __user *);
extern void __chk_io_ptr(const volatile void __iomem *);
#else
# define __user
# define __kernel
# define __safe
# define __force
# define __nocast
# define __iomem
# define __chk_user_ptr(x) (void)0
# define __chk_io_ptr(x) (void)0
# define __builtin_warning(x, y...) (1)
# define __must_hold(x)
# define __acquires(x)
# define __releases(x)
# define __acquire(x) (void)0
# define __release(x) (void)0
# define __cond_lock(x,c) (c)
# define __percpu
# define __rcu
#endif
/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
#define ___PASTE(a,b) a##b
#define __PASTE(a,b) ___PASTE(a,b)
#ifdef __KERNEL__
#ifdef __GNUC__
#include <linux/compiler-gcc.h>
#endif
#ifdef CC_USING_HOTPATCH
#define notrace __attribute__((hotpatch(0,0)))
#else
#define notrace __attribute__((no_instrument_function))
#endif
/* Intel compiler defines __GNUC__. So we will overwrite implementations
* coming from above header files here
*/
#ifdef __INTEL_COMPILER
# include <linux/compiler-intel.h>
#endif
/* Clang compiler defines __GNUC__. So we will overwrite implementations
* coming from above header files here
*/
#ifdef __clang__
#include <linux/compiler-clang.h>
#endif
/*
* Generic compiler-dependent macros required for kernel
* build go below this comment. Actual compiler/compiler version
* specific implementations come from the above header files
*/
struct ftrace_branch_data {
const char *func;
const char *file;
unsigned line;
union {
struct {
unsigned long correct;
unsigned long incorrect;
};
struct {
unsigned long miss;
unsigned long hit;
};
unsigned long miss_hit[2];
};
};
/*
* Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
* to disable branch tracing on a per file basis.
*/
#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
&& !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
#define likely_notrace(x) __builtin_expect(!!(x), 1)
#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
#define __branch_check__(x, expect) ({ \
int ______r; \
static struct ftrace_branch_data \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_annotated_branch"))) \
______f = { \
.func = __func__, \
.file = __FILE__, \
.line = __LINE__, \
}; \
______r = likely_notrace(x); \
ftrace_likely_update(&______f, ______r, expect); \
______r; \
})
/*
* Using __builtin_constant_p(x) to ignore cases where the return
* value is always the same. This idea is taken from a similar patch
* written by Daniel Walker.
*/
# ifndef likely
# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
# endif
# ifndef unlikely
# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
# endif
#ifdef CONFIG_PROFILE_ALL_BRANCHES
/*
* "Define 'is'", Bill Clinton
* "Define 'if'", Steven Rostedt
*/
#define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) )
#define __trace_if(cond) \
if (__builtin_constant_p((cond)) ? !!(cond) : \
({ \
int ______r; \
static struct ftrace_branch_data \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_branch"))) \
______f = { \
.func = __func__, \
.file = __FILE__, \
.line = __LINE__, \
}; \
______r = !!(cond); \
______f.miss_hit[______r]++; \
______r; \
}))
#endif /* CONFIG_PROFILE_ALL_BRANCHES */
#else
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif
/* Optimization barrier */
#ifndef barrier
# define barrier() __memory_barrier()
#endif
#ifndef barrier_data
# define barrier_data(ptr) barrier()
#endif
/* Unreachable code */
#ifndef unreachable
# define unreachable() do { } while (1)
#endif
#ifndef RELOC_HIDE
# define RELOC_HIDE(ptr, off) \
({ unsigned long __ptr; \
__ptr = (unsigned long) (ptr); \
(typeof(ptr)) (__ptr + (off)); })
#endif
#ifndef OPTIMIZER_HIDE_VAR
#define OPTIMIZER_HIDE_VAR(var) barrier()
#endif
/* Not-quite-unique ID. */
#ifndef __UNIQUE_ID
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
#endif
#include <uapi/linux/types.h>
static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
default:
barrier();
__builtin_memcpy((void *)res, (const void *)p, size);
barrier();
}
}
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
default:
barrier();
__builtin_memcpy((void *)p, (const void *)res, size);
barrier();
}
}
/*
* Prevent the compiler from merging or refetching reads or writes. The
* compiler is also forbidden from reordering successive instances of
* READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
* compiler is aware of some particular ordering. One way to make the
* compiler aware of ordering is to put the two invocations of READ_ONCE,
* WRITE_ONCE or ACCESS_ONCE() in different C statements.
*
* In contrast to ACCESS_ONCE these two macros will also work on aggregate
* data types like structs or unions. If the size of the accessed data
* type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
* READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a
* compile-time warning.
*
* Their two major use cases are: (1) Mediating communication between
* process-level code and irq/NMI handlers, all running on the same CPU,
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
* mutilate accesses that either do not require ordering or that interact
* with an explicit memory barrier or atomic instruction that provides the
* required ordering.
*/
#define READ_ONCE(x) \
({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
#define WRITE_ONCE(x, val) \
({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#ifdef __KERNEL__
/*
* Allow us to mark functions as 'deprecated' and have gcc emit a nice
* warning for each use, in hopes of speeding the functions removal.
* Usage is:
* int __deprecated foo(void)
*/
#ifndef __deprecated
# define __deprecated /* unimplemented */
#endif
#ifdef MODULE
#define __deprecated_for_modules __deprecated
#else
#define __deprecated_for_modules
#endif
#ifndef __must_check
#define __must_check
#endif
#ifndef CONFIG_ENABLE_MUST_CHECK
#undef __must_check
#define __must_check
#endif
#ifndef CONFIG_ENABLE_WARN_DEPRECATED
#undef __deprecated
#undef __deprecated_for_modules
#define __deprecated
#define __deprecated_for_modules
#endif
/*
* Allow us to avoid 'defined but not used' warnings on functions and data,
* as well as force them to be emitted to the assembly file.
*
* As of gcc 3.4, static functions that are not marked with attribute((used))
* may be elided from the assembly file. As of gcc 3.4, static data not so
* marked will not be elided, but this may change in a future gcc version.
*
* NOTE: Because distributions shipped with a backported unit-at-a-time
* compiler in gcc 3.3, we must define __used to be __attribute__((used))
* for gcc >=3.3 instead of 3.4.
*
* In prior versions of gcc, such functions and data would be emitted, but
* would be warned about except with attribute((unused)).
*
* Mark functions that are referenced only in inline assembly as __used so
* the code is emitted even though it appears to be unreferenced.
*/
#ifndef __used
# define __used /* unimplemented */
#endif
#ifndef __maybe_unused
# define __maybe_unused /* unimplemented */
#endif
#ifndef __always_unused
# define __always_unused /* unimplemented */
#endif
#ifndef noinline
#define noinline
#endif
/*
* Rather then using noinline to prevent stack consumption, use
* noinline_for_stack instead. For documentation reasons.
*/
#define noinline_for_stack noinline
#ifndef __always_inline
#define __always_inline inline
#endif
#endif /* __KERNEL__ */
/*
* From the GCC manual:
*
* Many functions do not examine any values except their arguments,
* and have no effects except the return value. Basically this is
* just slightly more strict class than the `pure' attribute above,
* since function is not allowed to read global memory.
*
* Note that a function that has pointer arguments and examines the
* data pointed to must _not_ be declared `const'. Likewise, a
* function that calls a non-`const' function usually must not be
* `const'. It does not make sense for a `const' function to return
* `void'.
*/
#ifndef __attribute_const__
# define __attribute_const__ /* unimplemented */
#endif
/*
* Tell gcc if a function is cold. The compiler will assume any path
* directly leading to the call is unlikely.
*/
#ifndef __cold
#define __cold
#endif
/* Simple shorthand for a section definition */
#ifndef __section
# define __section(S) __attribute__ ((__section__(#S)))
#endif
#ifndef __visible
#define __visible
#endif
/* Are two types/vars the same type (ignoring qualifiers)? */
#ifndef __same_type
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#endif
/* Is this type a native word size -- useful for atomic operations */
#ifndef __native_word
# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
#endif
/* Compile time object size, -1 for unknown */
#ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1
#endif
#ifndef __compiletime_warning
# define __compiletime_warning(message)
#endif
#ifndef __compiletime_error
# define __compiletime_error(message)
/*
* Sparse complains of variable sized arrays due to the temporary variable in
* __compiletime_assert. Unfortunately we can't just expand it out to make
* sparse see a constant array size without breaking compiletime_assert on old
* versions of GCC (e.g. 4.2.4), so hide the array from sparse altogether.
*/
# ifndef __CHECKER__
# define __compiletime_error_fallback(condition) \
do { ((void)sizeof(char[1 - 2 * condition])); } while (0)
# endif
#endif
#ifndef __compiletime_error_fallback
# define __compiletime_error_fallback(condition) do { } while (0)
#endif
#define __compiletime_assert(condition, msg, prefix, suffix) \
do { \
bool __cond = !(condition); \
extern void prefix ## suffix(void) __compiletime_error(msg); \
if (__cond) \
prefix ## suffix(); \
__compiletime_error_fallback(__cond); \
} while (0)
#define _compiletime_assert(condition, msg, prefix, suffix) \
__compiletime_assert(condition, msg, prefix, suffix)
/**
* compiletime_assert - break build and emit msg if condition is false
* @condition: a compile-time constant condition to check
* @msg: a message to emit if condition is false
*
* In tradition of POSIX assert, this macro will break the build if the
* supplied condition is *false*, emitting the supplied error message if the
* compiler has support to do so.
*/
#define compiletime_assert(condition, msg) \
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
#define compiletime_assert_atomic_type(t) \
compiletime_assert(__native_word(t), \
"Need native word sized stores/loads for atomicity.")
/*
* Prevent the compiler from merging or refetching accesses. The compiler
* is also forbidden from reordering successive instances of ACCESS_ONCE(),
* but only when the compiler is aware of some particular ordering. One way
* to make the compiler aware of ordering is to put the two invocations of
* ACCESS_ONCE() in different C statements.
*
* ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE
* on a union member will work as long as the size of the member matches the
* size of the union and the size is smaller than word size.
*
* The major use cases of ACCESS_ONCE used to be (1) Mediating communication
* between process-level code and irq/NMI handlers, all running on the same CPU,
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
* mutilate accesses that either do not require ordering or that interact
* with an explicit memory barrier or atomic instruction that provides the
* required ordering.
*
* If possible use READ_ONCE/ASSIGN_ONCE instead.
*/
#define __ACCESS_ONCE(x) ({ \
__maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
(volatile typeof(x) *)&(x); })
#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
#ifdef CONFIG_KPROBES
# define __kprobes __attribute__((__section__(".kprobes.text")))
# define nokprobe_inline __always_inline
#else
# define __kprobes
# define nokprobe_inline inline
#endif
#endif /* __LINUX_COMPILER_H */