2017-11-01 14:08:43 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
2012-10-13 09:46:48 +00:00
|
|
|
#ifndef _UAPI_LINUX_SWAB_H
|
|
|
|
#define _UAPI_LINUX_SWAB_H
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
include/uapi/linux/swab: Fix potentially missing __always_inline
Commit bc27fb68aaad ("include/uapi/linux/byteorder, swab: force inlining
of some byteswap operations") added __always_inline to swab functions
and commit 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to
userspace headers") added a definition of __always_inline for use in
exported headers when the kernel's compiler.h is not available.
However, since swab.h does not include stddef.h, if the header soup does
not indirectly include it, the definition of __always_inline is missing,
resulting in a compilation failure, which was observed compiling the
perf tool using exported headers containing this commit:
In file included from /usr/include/linux/byteorder/little_endian.h:12:0,
from /usr/include/asm/byteorder.h:14,
from tools/include/uapi/linux/perf_event.h:20,
from perf.h:8,
from builtin-bench.c:18:
/usr/include/linux/swab.h:160:8: error: unknown type name `__always_inline'
static __always_inline __u16 __swab16p(const __u16 *p)
Fix this by replacing the inclusion of linux/compiler.h with
linux/stddef.h to ensure that we pick up that definition if required,
without relying on it's indirect inclusion. compiler.h is then included
indirectly, via stddef.h.
Fixes: 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers")
Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Petr Vaněk <arkamar@atlas.cz>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2022-09-27 21:52:56 +00:00
|
|
|
#include <linux/stddef.h>
|
2020-01-31 06:16:40 +00:00
|
|
|
#include <asm/bitsperlong.h>
|
2012-10-13 09:46:48 +00:00
|
|
|
#include <asm/swab.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* casts are necessary for constants, because we never know how for sure
|
|
|
|
* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
|
|
|
|
*/
|
|
|
|
#define ___constant_swab16(x) ((__u16)( \
|
|
|
|
(((__u16)(x) & (__u16)0x00ffU) << 8) | \
|
|
|
|
(((__u16)(x) & (__u16)0xff00U) >> 8)))
|
|
|
|
|
|
|
|
#define ___constant_swab32(x) ((__u32)( \
|
|
|
|
(((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
|
|
|
|
(((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
|
|
|
|
(((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
|
|
|
|
(((__u32)(x) & (__u32)0xff000000UL) >> 24)))
|
|
|
|
|
|
|
|
#define ___constant_swab64(x) ((__u64)( \
|
|
|
|
(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
|
|
|
|
(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
|
|
|
|
(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
|
|
|
|
(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \
|
|
|
|
(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \
|
|
|
|
(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
|
|
|
|
(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
|
|
|
|
(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
|
|
|
|
|
|
|
|
#define ___constant_swahw32(x) ((__u32)( \
|
|
|
|
(((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
|
|
|
|
(((__u32)(x) & (__u32)0xffff0000UL) >> 16)))
|
|
|
|
|
|
|
|
#define ___constant_swahb32(x) ((__u32)( \
|
|
|
|
(((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
|
|
|
|
(((__u32)(x) & (__u32)0xff00ff00UL) >> 8)))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Implement the following as inlines, but define the interface using
|
|
|
|
* macros to allow constant folding when possible:
|
|
|
|
* ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u16 __fswab16(__u16 val)
|
|
|
|
{
|
2016-05-05 23:22:39 +00:00
|
|
|
#if defined (__arch_swab16)
|
2012-10-13 09:46:48 +00:00
|
|
|
return __arch_swab16(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swab16(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u32 __fswab32(__u32 val)
|
|
|
|
{
|
2016-05-05 23:22:39 +00:00
|
|
|
#if defined(__arch_swab32)
|
2012-10-13 09:46:48 +00:00
|
|
|
return __arch_swab32(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swab32(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u64 __fswab64(__u64 val)
|
|
|
|
{
|
2016-05-05 23:22:39 +00:00
|
|
|
#if defined (__arch_swab64)
|
2012-10-13 09:46:48 +00:00
|
|
|
return __arch_swab64(val);
|
|
|
|
#elif defined(__SWAB_64_THRU_32__)
|
|
|
|
__u32 h = val >> 32;
|
|
|
|
__u32 l = val & ((1ULL << 32) - 1);
|
|
|
|
return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h)));
|
|
|
|
#else
|
|
|
|
return ___constant_swab64(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u32 __fswahw32(__u32 val)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahw32
|
|
|
|
return __arch_swahw32(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swahw32(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u32 __fswahb32(__u32 val)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahb32
|
|
|
|
return __arch_swahb32(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swahb32(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab16 - return a byteswapped 16-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*/
|
2016-05-05 23:22:39 +00:00
|
|
|
#ifdef __HAVE_BUILTIN_BSWAP16__
|
|
|
|
#define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
|
|
|
|
#else
|
2012-10-13 09:46:48 +00:00
|
|
|
#define __swab16(x) \
|
2022-06-08 22:35:39 +00:00
|
|
|
(__u16)(__builtin_constant_p(x) ? \
|
2012-10-13 09:46:48 +00:00
|
|
|
___constant_swab16(x) : \
|
|
|
|
__fswab16(x))
|
2016-05-05 23:22:39 +00:00
|
|
|
#endif
|
2012-10-13 09:46:48 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab32 - return a byteswapped 32-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*/
|
2016-05-05 23:22:39 +00:00
|
|
|
#ifdef __HAVE_BUILTIN_BSWAP32__
|
|
|
|
#define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
|
|
|
|
#else
|
2012-10-13 09:46:48 +00:00
|
|
|
#define __swab32(x) \
|
2022-06-08 22:35:39 +00:00
|
|
|
(__u32)(__builtin_constant_p(x) ? \
|
2012-10-13 09:46:48 +00:00
|
|
|
___constant_swab32(x) : \
|
|
|
|
__fswab32(x))
|
2016-05-05 23:22:39 +00:00
|
|
|
#endif
|
2012-10-13 09:46:48 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab64 - return a byteswapped 64-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*/
|
2016-05-05 23:22:39 +00:00
|
|
|
#ifdef __HAVE_BUILTIN_BSWAP64__
|
|
|
|
#define __swab64(x) (__u64)__builtin_bswap64((__u64)(x))
|
|
|
|
#else
|
2012-10-13 09:46:48 +00:00
|
|
|
#define __swab64(x) \
|
2022-06-08 22:35:39 +00:00
|
|
|
(__u64)(__builtin_constant_p(x) ? \
|
2012-10-13 09:46:48 +00:00
|
|
|
___constant_swab64(x) : \
|
|
|
|
__fswab64(x))
|
2016-05-05 23:22:39 +00:00
|
|
|
#endif
|
2012-10-13 09:46:48 +00:00
|
|
|
|
2020-01-31 06:16:40 +00:00
|
|
|
static __always_inline unsigned long __swab(const unsigned long y)
|
|
|
|
{
|
2020-02-21 04:04:03 +00:00
|
|
|
#if __BITS_PER_LONG == 64
|
2020-01-31 06:16:40 +00:00
|
|
|
return __swab64(y);
|
2020-02-21 04:04:03 +00:00
|
|
|
#else /* __BITS_PER_LONG == 32 */
|
2020-01-31 06:16:40 +00:00
|
|
|
return __swab32(y);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2012-10-13 09:46:48 +00:00
|
|
|
/**
|
|
|
|
* __swahw32 - return a word-swapped 32-bit value
|
|
|
|
* @x: value to wordswap
|
|
|
|
*
|
|
|
|
* __swahw32(0x12340000) is 0x00001234
|
|
|
|
*/
|
|
|
|
#define __swahw32(x) \
|
|
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
|
|
___constant_swahw32(x) : \
|
|
|
|
__fswahw32(x))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahb32 - return a high and low byte-swapped 32-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*
|
|
|
|
* __swahb32(0x12345678) is 0x34127856
|
|
|
|
*/
|
|
|
|
#define __swahb32(x) \
|
|
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
|
|
___constant_swahb32(x) : \
|
|
|
|
__fswahb32(x))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab16p - return a byteswapped 16-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 16-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-17 21:22:44 +00:00
|
|
|
static __always_inline __u16 __swab16p(const __u16 *p)
|
2012-10-13 09:46:48 +00:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab16p
|
|
|
|
return __arch_swab16p(p);
|
|
|
|
#else
|
|
|
|
return __swab16(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab32p - return a byteswapped 32-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-17 21:22:44 +00:00
|
|
|
static __always_inline __u32 __swab32p(const __u32 *p)
|
2012-10-13 09:46:48 +00:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab32p
|
|
|
|
return __arch_swab32p(p);
|
|
|
|
#else
|
|
|
|
return __swab32(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab64p - return a byteswapped 64-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 64-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-17 21:22:44 +00:00
|
|
|
static __always_inline __u64 __swab64p(const __u64 *p)
|
2012-10-13 09:46:48 +00:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab64p
|
|
|
|
return __arch_swab64p(p);
|
|
|
|
#else
|
|
|
|
return __swab64(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahw32p - return a wordswapped 32-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahw32() for details of wordswapping.
|
|
|
|
*/
|
|
|
|
static inline __u32 __swahw32p(const __u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahw32p
|
|
|
|
return __arch_swahw32p(p);
|
|
|
|
#else
|
|
|
|
return __swahw32(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahb32p - return a high and low byteswapped 32-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahb32() for details of high/low byteswapping.
|
|
|
|
*/
|
|
|
|
static inline __u32 __swahb32p(const __u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahb32p
|
|
|
|
return __arch_swahb32p(p);
|
|
|
|
#else
|
|
|
|
return __swahb32(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab16s - byteswap a 16-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 16-bit value
|
|
|
|
*/
|
|
|
|
static inline void __swab16s(__u16 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swab16s
|
|
|
|
__arch_swab16s(p);
|
|
|
|
#else
|
|
|
|
*p = __swab16p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
* __swab32s - byteswap a 32-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-17 21:22:44 +00:00
|
|
|
static __always_inline void __swab32s(__u32 *p)
|
2012-10-13 09:46:48 +00:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab32s
|
|
|
|
__arch_swab32s(p);
|
|
|
|
#else
|
|
|
|
*p = __swab32p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab64s - byteswap a 64-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 64-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-17 21:22:44 +00:00
|
|
|
static __always_inline void __swab64s(__u64 *p)
|
2012-10-13 09:46:48 +00:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab64s
|
|
|
|
__arch_swab64s(p);
|
|
|
|
#else
|
|
|
|
*p = __swab64p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahw32s - wordswap a 32-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahw32() for details of wordswapping
|
|
|
|
*/
|
|
|
|
static inline void __swahw32s(__u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahw32s
|
|
|
|
__arch_swahw32s(p);
|
|
|
|
#else
|
|
|
|
*p = __swahw32p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahb32s - high and low byteswap a 32-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahb32() for details of high and low byte swapping
|
|
|
|
*/
|
|
|
|
static inline void __swahb32s(__u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahb32s
|
|
|
|
__arch_swahb32s(p);
|
|
|
|
#else
|
|
|
|
*p = __swahb32p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* _UAPI_LINUX_SWAB_H */
|