linux-stable/arch/x86/include/asm/cmpxchg.h
Uros Bizjak 636d6a8b85 locking/atomic/x86: Introduce arch_sync_try_cmpxchg()
Introduce the arch_sync_try_cmpxchg() macro to improve code using
sync_try_cmpxchg() locking primitive. The new definitions use existing
__raw_try_cmpxchg() macros, but use its own "lock; " prefix.

The new macros improve assembly of the cmpxchg loop in
evtchn_fifo_unmask() from drivers/xen/events/events_fifo.c from:

 57a:	85 c0                	test   %eax,%eax
 57c:	78 52                	js     5d0 <...>
 57e:	89 c1                	mov    %eax,%ecx
 580:	25 ff ff ff af       	and    $0xafffffff,%eax
 585:	c7 04 24 00 00 00 00 	movl   $0x0,(%rsp)
 58c:	81 e1 ff ff ff ef    	and    $0xefffffff,%ecx
 592:	89 4c 24 04          	mov    %ecx,0x4(%rsp)
 596:	89 44 24 08          	mov    %eax,0x8(%rsp)
 59a:	8b 74 24 08          	mov    0x8(%rsp),%esi
 59e:	8b 44 24 04          	mov    0x4(%rsp),%eax
 5a2:	f0 0f b1 32          	lock cmpxchg %esi,(%rdx)
 5a6:	89 04 24             	mov    %eax,(%rsp)
 5a9:	8b 04 24             	mov    (%rsp),%eax
 5ac:	39 c1                	cmp    %eax,%ecx
 5ae:	74 07                	je     5b7 <...>
 5b0:	a9 00 00 00 40       	test   $0x40000000,%eax
 5b5:	75 c3                	jne    57a <...>
 <...>

to:

 578:	a9 00 00 00 40       	test   $0x40000000,%eax
 57d:	74 2b                	je     5aa <...>
 57f:	85 c0                	test   %eax,%eax
 581:	78 40                	js     5c3 <...>
 583:	89 c1                	mov    %eax,%ecx
 585:	25 ff ff ff af       	and    $0xafffffff,%eax
 58a:	81 e1 ff ff ff ef    	and    $0xefffffff,%ecx
 590:	89 4c 24 04          	mov    %ecx,0x4(%rsp)
 594:	89 44 24 08          	mov    %eax,0x8(%rsp)
 598:	8b 4c 24 08          	mov    0x8(%rsp),%ecx
 59c:	8b 44 24 04          	mov    0x4(%rsp),%eax
 5a0:	f0 0f b1 0a          	lock cmpxchg %ecx,(%rdx)
 5a4:	89 44 24 04          	mov    %eax,0x4(%rsp)
 5a8:	75 30                	jne    5da <...>
 <...>
 5da:	8b 44 24 04          	mov    0x4(%rsp),%eax
 5de:	eb 98                	jmp    578 <...>

The new code removes move instructions from 585: 5a6: and 5a9:
and the compare from 5ac:. Additionally, the compiler assumes that
cmpxchg success is more probable and optimizes code flow accordingly.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
2023-10-09 18:14:25 +02:00

248 lines
7.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ASM_X86_CMPXCHG_H
#define ASM_X86_CMPXCHG_H
#include <linux/compiler.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
/*
* Non-existent functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error().
*/
extern void __xchg_wrong_size(void)
__compiletime_error("Bad argument size for xchg");
extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg");
extern void __xadd_wrong_size(void)
__compiletime_error("Bad argument size for xadd");
extern void __add_wrong_size(void)
__compiletime_error("Bad argument size for add");
/*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
* -1 because sizeof will never return -1, thereby making those switch
* case statements guaranteed dead code which the compiler will
* eliminate, and allowing the "missing symbol in the default case" to
* indicate a usage error.
*/
#define __X86_CASE_B 1
#define __X86_CASE_W 2
#define __X86_CASE_L 4
#ifdef CONFIG_64BIT
#define __X86_CASE_Q 8
#else
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
#endif
/*
* An exchange-type operation, which takes a value and a pointer, and
* returns the old value.
*/
#define __xchg_op(ptr, arg, op, lock) \
({ \
__typeof__ (*(ptr)) __ret = (arg); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
asm volatile (lock #op "b %b0, %1\n" \
: "+q" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_W: \
asm volatile (lock #op "w %w0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_L: \
asm volatile (lock #op "l %0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
asm volatile (lock #op "q %q0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
default: \
__ ## op ## _wrong_size(); \
} \
__ret; \
})
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
* Since this is generally used to protect other memory information, we
* use "asm volatile" and "memory" clobbers to prevent gcc from moving
* information around.
*/
#define arch_xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define __sync_cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
#define __cmpxchg_local(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "")
#ifdef CONFIG_X86_32
# include <asm/cmpxchg_32.h>
#else
# include <asm/cmpxchg_64.h>
#endif
#define arch_cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define arch_sync_cmpxchg(ptr, old, new) \
__sync_cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define arch_cmpxchg_local(ptr, old, new) \
__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
({ \
bool success; \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
if (unlikely(!success)) \
*_old = __old; \
likely(success); \
})
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
#define __sync_try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), "lock; ")
#define __try_cmpxchg_local(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), "")
#define arch_try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
#define arch_sync_try_cmpxchg(ptr, pold, new) \
__sync_try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
#define arch_try_cmpxchg_local(ptr, pold, new) \
__try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr)))
/*
* xadd() adds "inc" to "*ptr" and atomically returns the previous
* value of "*ptr".
*
* xadd() is locked when multiple CPUs are online
*/
#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
#endif /* ASM_X86_CMPXCHG_H */