linux-stable/arch/x86/lib/atomic64_386_32.S

/*
 * atomic64_t for 386/486
 *
 * Copyright © 2010  Luca Barbieri
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */

#include <linux/linkage.h>
#include <asm/alternative-asm.h>
#include <asm/dwarf2.h>

/* if you want SMP support, implement these with real spinlocks */
.macro LOCK reg
	pushfl
	CFI_ADJUST_CFA_OFFSET 4
	cli
.endm

.macro UNLOCK reg
	popfl
	CFI_ADJUST_CFA_OFFSET -4
.endm

.macro BEGIN func reg
$v = \reg

ENTRY(atomic64_\func\()_386)
	CFI_STARTPROC
	LOCK $v

.macro RETURN
	UNLOCK $v
	ret
.endm

.macro END_
	CFI_ENDPROC
ENDPROC(atomic64_\func\()_386)
.purgem RETURN
.purgem END_
.purgem END
.endm

.macro END
RETURN
END_
.endm
.endm

BEGIN read %ecx
	movl  ($v), %eax
	movl 4($v), %edx
END

BEGIN set %esi
	movl %ebx,  ($v)
	movl %ecx, 4($v)
END

BEGIN xchg %esi
	movl  ($v), %eax
	movl 4($v), %edx
	movl %ebx,  ($v)
	movl %ecx, 4($v)
END

BEGIN add %ecx
	addl %eax,  ($v)
	adcl %edx, 4($v)
END

BEGIN add_return %ecx
	addl  ($v), %eax
	adcl 4($v), %edx
	movl %eax,  ($v)
	movl %edx, 4($v)
END

BEGIN sub %ecx
	subl %eax,  ($v)
	sbbl %edx, 4($v)
END

BEGIN sub_return %ecx
	negl %edx
	negl %eax
	sbbl $0, %edx
	addl  ($v), %eax
	adcl 4($v), %edx
	movl %eax,  ($v)
	movl %edx, 4($v)
END

BEGIN inc %esi
	addl $1,  ($v)
	adcl $0, 4($v)
END

BEGIN inc_return %esi
	movl  ($v), %eax
	movl 4($v), %edx
	addl $1, %eax
	adcl $0, %edx
	movl %eax,  ($v)
	movl %edx, 4($v)
END

BEGIN dec %esi
	subl $1,  ($v)
	sbbl $0, 4($v)
END

BEGIN dec_return %esi
	movl  ($v), %eax
	movl 4($v), %edx
	subl $1, %eax
	sbbl $0, %edx
	movl %eax,  ($v)
	movl %edx, 4($v)
END

BEGIN add_unless %ecx
	addl %eax, %esi
	adcl %edx, %edi
	addl  ($v), %eax
	adcl 4($v), %edx
	cmpl %eax, %esi
	je 3f
1:
	movl %eax,  ($v)
	movl %edx, 4($v)
	movl $1, %eax
2:
RETURN
3:
	cmpl %edx, %edi
	jne 1b
	xorl %eax, %eax
	jmp 2b
END_

BEGIN inc_not_zero %esi
	movl  ($v), %eax
	movl 4($v), %edx
	testl %eax, %eax
	je 3f
1:
	addl $1, %eax
	adcl $0, %edx
	movl %eax,  ($v)
	movl %edx, 4($v)
	xorl %eax, %eax
2:
RETURN
3:
	testl %edx, %edx
	jne 1b
	movl $1, %eax
	jmp 2b
END_

BEGIN dec_if_positive %esi
	movl  ($v), %eax
	movl 4($v), %edx
	subl $1, %eax
	sbbl $0, %edx
	js 1f
	movl %eax,  ($v)
	movl %edx, 4($v)
1:
END
x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-02-24 09:54:25 +00:00			`/*`
			`* atomic64_t for 386/486`
			`*`
			`* Copyright © 2010 Luca Barbieri`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*/`

			`#include <linux/linkage.h>`
			`#include <asm/alternative-asm.h>`
			`#include <asm/dwarf2.h>`

			`/* if you want SMP support, implement these with real spinlocks */`
			`.macro LOCK reg`
			`pushfl`
			`CFI_ADJUST_CFA_OFFSET 4`
			`cli`
			`.endm`

			`.macro UNLOCK reg`
			`popfl`
			`CFI_ADJUST_CFA_OFFSET -4`
			`.endm`

			`.macro BEGIN func reg`
			`$v = \reg`

			`ENTRY(atomic64_\func\()_386)`
			`CFI_STARTPROC`
			`LOCK $v`

			`.macro RETURN`
			`UNLOCK $v`
			`ret`
			`.endm`

			`.macro END_`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_\func\()_386)`
			`.purgem RETURN`
			`.purgem END_`
			`.purgem END`
			`.endm`

			`.macro END`
			`RETURN`
			`END_`
			`.endm`
			`.endm`

			`BEGIN read %ecx`
			`movl ($v), %eax`
			`movl 4($v), %edx`
			`END`

			`BEGIN set %esi`
			`movl %ebx, ($v)`
			`movl %ecx, 4($v)`
			`END`

			`BEGIN xchg %esi`
			`movl ($v), %eax`
			`movl 4($v), %edx`
			`movl %ebx, ($v)`
			`movl %ecx, 4($v)`
			`END`

			`BEGIN add %ecx`
			`addl %eax, ($v)`
			`adcl %edx, 4($v)`
			`END`

			`BEGIN add_return %ecx`
			`addl ($v), %eax`
			`adcl 4($v), %edx`
			`movl %eax, ($v)`
			`movl %edx, 4($v)`
			`END`

			`BEGIN sub %ecx`
			`subl %eax, ($v)`
			`sbbl %edx, 4($v)`
			`END`

			`BEGIN sub_return %ecx`
			`negl %edx`
			`negl %eax`
			`sbbl $0, %edx`
			`addl ($v), %eax`
			`adcl 4($v), %edx`
			`movl %eax, ($v)`
			`movl %edx, 4($v)`
			`END`

			`BEGIN inc %esi`
			`addl $1, ($v)`
			`adcl $0, 4($v)`
			`END`

			`BEGIN inc_return %esi`
			`movl ($v), %eax`
			`movl 4($v), %edx`
			`addl $1, %eax`
			`adcl $0, %edx`
			`movl %eax, ($v)`
			`movl %edx, 4($v)`
			`END`

			`BEGIN dec %esi`
			`subl $1, ($v)`
			`sbbl $0, 4($v)`
			`END`

			`BEGIN dec_return %esi`
			`movl ($v), %eax`
			`movl 4($v), %edx`
			`subl $1, %eax`
			`sbbl $0, %edx`
			`movl %eax, ($v)`
			`movl %edx, 4($v)`
			`END`

			`BEGIN add_unless %ecx`
			`addl %eax, %esi`
			`adcl %edx, %edi`
			`addl ($v), %eax`
			`adcl 4($v), %edx`
			`cmpl %eax, %esi`
			`je 3f`
			`1:`
			`movl %eax, ($v)`
			`movl %edx, 4($v)`
x86-32: Fix atomic64_add_unless return value convention atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The implementation did the opposite thing. Reported-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267469749-11878-3-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-03-01 18:55:46 +00:00			`movl $1, %eax`
x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-02-24 09:54:25 +00:00			`2:`
			`RETURN`
			`3:`
			`cmpl %edx, %edi`
			`jne 1b`
x86-32: Fix atomic64_add_unless return value convention atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The implementation did the opposite thing. Reported-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267469749-11878-3-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-03-01 18:55:46 +00:00			`xorl %eax, %eax`
x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-02-24 09:54:25 +00:00			`jmp 2b`
			`END_`

			`BEGIN inc_not_zero %esi`
			`movl ($v), %eax`
			`movl 4($v), %edx`
			`testl %eax, %eax`
			`je 3f`
			`1:`
			`addl $1, %eax`
			`adcl $0, %edx`
			`movl %eax, ($v)`
			`movl %edx, 4($v)`
			`xorl %eax, %eax`
			`2:`
			`RETURN`
			`3:`
			`testl %edx, %edx`
			`jne 1b`
			`movl $1, %eax`
			`jmp 2b`
			`END_`

			`BEGIN dec_if_positive %esi`
			`movl ($v), %eax`
			`movl 4($v), %edx`
			`subl $1, %eax`
			`sbbl $0, %edx`
			`js 1f`
			`movl %eax, ($v)`
			`movl %edx, 4($v)`
			`1:`
			`END`