asm-generic: New generic ticket-based spinlock

This contains a new ticket-based spinlock that uses only generic
 atomics and doesn't require as much from the memory system as qspinlock
 does in order to be fair.  It also includes a bit of documentation about
 the qspinlock and qrwlock fairness requirements.
 
 This will soon be used by a handful of architectures that don't meet the
 qspinlock requirements.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEKzw3R0RoQ7JKlDp6LhMZ81+7GIkFAmJ8BZETHHBhbG1lckBk
 YWJiZWx0LmNvbQAKCRAuExnzX7sYiWC2D/4qA9r9Niv/Vw9/H08+kefmYsVLjoZ7
 n9tbS5+Rj/8TCwVpqQSkJix16XGVP760KT4XmmljJMNjKiHP4Vg8ZsNfewK6gxer
 Dk1MkrTEUk+yzCheyCFramwBmvz+tV1qDSq+/Lgl2jMDwlKRidVW3mGkeh4y+QRF
 Xvc3voW689ZGtnsPNjdAsXRKJrhTsdAXaj57RSiPXKGTJS5Ll+FO6pgNMW7fkAL3
 XnWRVM03WpvNh70RcSV3jfZN2CSTRaw8d44CEOkGtbFTe9qwFkuSqhpTyCyfJ+NL
 0Z3K4ZUypcjgC4lkxXJzvQhe5Vi3S7GFypzMeyAinjNegrXWY7Ke09mYClVPplwO
 kt2GTCmHcCMItZI9G7DLtYkNozlvNtCD0Qb63UptBxzqIedcKtNg+kY2Ovmnbi0A
 PeGN5OiARlpiwtYnJMh3fq5muMakDBm+You8u0tB0eKvBorvElteBwqwOg2zdhka
 iuoLtOtgD/Sx6UWvVeApx+vhlJ9WdOXDD9AZjsgbZDYvk+MX0lj8jvnS8jidDmAr
 j6jQ9qm2Ak7cUtZnz9hQKlDakqzNX8TsS7B91QV5nrJxwGJHCeqry066A4Sxmf4T
 mkNPfUfaBh1eBSaLzX+kaSMyFqNBeBopQNsH72zGKoYCYIJJxoOLBZbKuypJSVyf
 e0DDge2doJSwHg==
 =Ti7k
 -----END PGP SIGNATURE-----

Merge tag 'generic-ticket-spinlocks-v6' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux into asm-generic

asm-generic: New generic ticket-based spinlock

This contains a new ticket-based spinlock that uses only generic
atomics and doesn't require as much from the memory system as qspinlock
does in order to be fair.  It also includes a bit of documentation about
the qspinlock and qrwlock fairness requirements.

This will soon be used by a handful of architectures that don't meet the
qspinlock requirements.

* tag 'generic-ticket-spinlocks-v6' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux:
  csky: Move to generic ticket-spinlock
  RISC-V: Move to queued RW locks
  RISC-V: Move to generic spinlocks
  openrisc: Move to ticket-spinlock
  asm-generic: qrwlock: Document the spinlock fairness requirements
  asm-generic: qspinlock: Indicate the use of mixed-size atomics
  asm-generic: ticket-lock: New generic ticket-based spinlock
This commit is contained in:
Arnd Bergmann 2022-05-11 20:52:52 +02:00
commit 03a679a1a4
15 changed files with 147 additions and 321 deletions

View file

@ -3,7 +3,10 @@ generic-y += asm-offsets.h
generic-y += extable.h
generic-y += gpio.h
generic-y += kvm_para.h
generic-y += spinlock.h
generic-y += spinlock_types.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += parport.h
generic-y += user.h
generic-y += vmlinux.lds.h

View file

@ -1,89 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_CSKY_SPINLOCK_H
#define __ASM_CSKY_SPINLOCK_H
#include <linux/spinlock_types.h>
#include <asm/barrier.h>
/*
* Ticket-based spin-locking.
*/
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
arch_spinlock_t lockval;
u32 ticket_next = 1 << TICKET_NEXT;
u32 *p = &lock->lock;
u32 tmp;
asm volatile (
"1: ldex.w %0, (%2) \n"
" mov %1, %0 \n"
" add %0, %3 \n"
" stex.w %0, (%2) \n"
" bez %0, 1b \n"
: "=&r" (tmp), "=&r" (lockval)
: "r"(p), "r"(ticket_next)
: "cc");
while (lockval.tickets.next != lockval.tickets.owner)
lockval.tickets.owner = READ_ONCE(lock->tickets.owner);
smp_mb();
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
u32 tmp, contended, res;
u32 ticket_next = 1 << TICKET_NEXT;
u32 *p = &lock->lock;
do {
asm volatile (
" ldex.w %0, (%3) \n"
" movi %2, 1 \n"
" rotli %1, %0, 16 \n"
" cmpne %1, %0 \n"
" bt 1f \n"
" movi %2, 0 \n"
" add %0, %0, %4 \n"
" stex.w %0, (%3) \n"
"1: \n"
: "=&r" (res), "=&r" (tmp), "=&r" (contended)
: "r"(p), "r"(ticket_next)
: "cc");
} while (!res);
if (!contended)
smp_mb();
return !contended;
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
WRITE_ONCE(lock->tickets.owner, lock->tickets.owner + 1);
}
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.tickets.owner == lock.tickets.next;
}
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tickets = READ_ONCE(lock->tickets);
return (tickets.next - tickets.owner) > 1;
}
#define arch_spin_is_contended arch_spin_is_contended
#include <asm/qrwlock.h>
#endif /* __ASM_CSKY_SPINLOCK_H */

View file

@ -1,27 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_CSKY_SPINLOCK_TYPES_H
#define __ASM_CSKY_SPINLOCK_TYPES_H
#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
#define TICKET_NEXT 16
typedef struct {
union {
u32 lock;
struct __raw_tickets {
/* little endian */
u16 owner;
u16 next;
} tickets;
};
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
#include <asm-generic/qrwlock_types.h>
#endif /* __ASM_CSKY_SPINLOCK_TYPES_H */

View file

@ -30,7 +30,6 @@ config OPENRISC
select HAVE_DEBUG_STACKOVERFLOW
select OR1K_PIC
select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_USE_QUEUED_RWLOCKS
select OMPIC if SMP
select ARCH_WANT_FRAME_POINTERS

View file

@ -1,9 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += qspinlock_types.h
generic-y += qspinlock.h
generic-y += spinlock_types.h
generic-y += spinlock.h
generic-y += qrwlock_types.h
generic-y += qrwlock.h
generic-y += user.h

View file

@ -1,27 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* OpenRISC Linux
*
* Linux architectural port borrowing liberally from similar works of
* others. All original copyrights apply as per the original source
* declaration.
*
* OpenRISC implementation:
* Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
* Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
* et al.
*/
#ifndef __ASM_OPENRISC_SPINLOCK_H
#define __ASM_OPENRISC_SPINLOCK_H
#include <asm/qspinlock.h>
#include <asm/qrwlock.h>
#define arch_spin_relax(lock) cpu_relax()
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
#endif

View file

@ -1,7 +0,0 @@
#ifndef _ASM_OPENRISC_SPINLOCK_TYPES_H
#define _ASM_OPENRISC_SPINLOCK_TYPES_H
#include <asm/qspinlock_types.h>
#include <asm/qrwlock_types.h>
#endif /* _ASM_OPENRISC_SPINLOCK_TYPES_H */

View file

@ -39,6 +39,7 @@ config RISCV
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB

View file

@ -3,5 +3,9 @@ generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
generic-y += parport.h
generic-y += spinlock.h
generic-y += spinlock_types.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += user.h
generic-y += vmlinux.lds.h

View file

@ -1,135 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2015 Regents of the University of California
* Copyright (C) 2017 SiFive
*/
#ifndef _ASM_RISCV_SPINLOCK_H
#define _ASM_RISCV_SPINLOCK_H
#include <linux/kernel.h>
#include <asm/current.h>
#include <asm/fence.h>
/*
* Simple spin lock operations. These provide no fairness guarantees.
*/
/* FIXME: Replace this with a ticket lock, like MIPS. */
#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_store_release(&lock->lock, 0);
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
int tmp = 1, busy;
__asm__ __volatile__ (
" amoswap.w %0, %2, %1\n"
RISCV_ACQUIRE_BARRIER
: "=r" (busy), "+A" (lock->lock)
: "r" (tmp)
: "memory");
return !busy;
}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
while (1) {
if (arch_spin_is_locked(lock))
continue;
if (arch_spin_trylock(lock))
break;
}
}
/***********************************************************/
static inline void arch_read_lock(arch_rwlock_t *lock)
{
int tmp;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bltz %1, 1b\n"
" addi %1, %1, 1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp)
:: "memory");
}
static inline void arch_write_lock(arch_rwlock_t *lock)
{
int tmp;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bnez %1, 1b\n"
" li %1, -1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp)
:: "memory");
}
static inline int arch_read_trylock(arch_rwlock_t *lock)
{
int busy;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bltz %1, 1f\n"
" addi %1, %1, 1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n"
: "+A" (lock->lock), "=&r" (busy)
:: "memory");
return !busy;
}
static inline int arch_write_trylock(arch_rwlock_t *lock)
{
int busy;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bnez %1, 1f\n"
" li %1, -1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n"
: "+A" (lock->lock), "=&r" (busy)
:: "memory");
return !busy;
}
static inline void arch_read_unlock(arch_rwlock_t *lock)
{
__asm__ __volatile__(
RISCV_RELEASE_BARRIER
" amoadd.w x0, %1, %0\n"
: "+A" (lock->lock)
: "r" (-1)
: "memory");
}
static inline void arch_write_unlock(arch_rwlock_t *lock)
{
smp_store_release(&lock->lock, 0);
}
#endif /* _ASM_RISCV_SPINLOCK_H */

View file

@ -1,25 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2015 Regents of the University of California
*/
#ifndef _ASM_RISCV_SPINLOCK_TYPES_H
#define _ASM_RISCV_SPINLOCK_TYPES_H
#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
typedef struct {
volatile unsigned int lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
#endif /* _ASM_RISCV_SPINLOCK_TYPES_H */

View file

@ -2,6 +2,10 @@
/*
* Queue read/write lock
*
* These use generic atomic and locking routines, but depend on a fair spinlock
* implementation in order to be fair themselves. The implementation in
* asm-generic/spinlock.h meets these requirements.
*
* (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
*
* Authors: Waiman Long <waiman.long@hp.com>

View file

@ -2,6 +2,35 @@
/*
* Queued spinlock
*
* A 'generic' spinlock implementation that is based on MCS locks. For an
* architecture that's looking for a 'generic' spinlock, please first consider
* ticket-lock.h and only come looking here when you've considered all the
* constraints below and can show your hardware does actually perform better
* with qspinlock.
*
* qspinlock relies on atomic_*_release()/atomic_*_acquire() to be RCsc (or no
* weaker than RCtso if you're power), where regular code only expects atomic_t
* to be RCpc.
*
* qspinlock relies on a far greater (compared to asm-generic/spinlock.h) set
* of atomic operations to behave well together, please audit them carefully to
* ensure they all have forward progress. Many atomic operations may default to
* cmpxchg() loops which will not have good forward progress properties on
* LL/SC architectures.
*
* One notable example is atomic_fetch_or_acquire(), which x86 cannot (cheaply)
* do. Carefully read the patches that introduced
* queued_fetch_set_pending_acquire().
*
* qspinlock also heavily relies on mixed size atomic operations, in specific
* it requires architectures to have xchg16; something which many LL/SC
* architectures need to implement as a 32bit and+or in order to satisfy the
* forward progress guarantees mentioned above.
*
* Further reading on mixed size atomics that might be relevant:
*
* http://www.cl.cam.ac.uk/~pes20/popl17/mixed-size.pdf
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*

View file

@ -1,12 +1,92 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* 'Generic' ticket-lock implementation.
*
* It relies on atomic_fetch_add() having well defined forward progress
* guarantees under contention. If your architecture cannot provide this, stick
* to a test-and-set lock.
*
* It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
* sub-word of the value. This is generally true for anything LL/SC although
* you'd be hard pressed to find anything useful in architecture specifications
* about this. If your architecture cannot do this you might be better off with
* a test-and-set.
*
* It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
* uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
* a full fence after the spin to upgrade the otherwise-RCpc
* atomic_cond_read_acquire().
*
* The implementation uses smp_cond_load_acquire() to spin, so if the
* architecture has WFE like instructions to sleep instead of poll for word
* modifications be sure to implement that (see ARM64 for example).
*
*/
#ifndef __ASM_GENERIC_SPINLOCK_H
#define __ASM_GENERIC_SPINLOCK_H
/*
* You need to implement asm/spinlock.h for SMP support. The generic
* version does not handle SMP.
*/
#ifdef CONFIG_SMP
#error need an architecture specific asm/spinlock.h
#endif
#include <linux/atomic.h>
#include <asm-generic/spinlock_types.h>
static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
{
u32 val = atomic_fetch_add(1<<16, lock);
u16 ticket = val >> 16;
if (ticket == (u16)val)
return;
/*
* atomic_cond_read_acquire() is RCpc, but rather than defining a
* custom cond_read_rcsc() here we just emit a full fence. We only
* need the prior reads before subsequent writes ordering from
* smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
* have no outstanding writes due to the atomic_fetch_add() the extra
* orderings are free.
*/
atomic_cond_read_acquire(lock, ticket == (u16)VAL);
smp_mb();
}
static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
{
u32 old = atomic_read(lock);
if ((old >> 16) != (old & 0xffff))
return false;
return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
}
static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
u32 val = atomic_read(lock);
smp_store_release(ptr, (u16)val + 1);
}
static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
u32 val = atomic_read(lock);
return ((val >> 16) != (val & 0xffff));
}
static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
u32 val = atomic_read(lock);
return (s16)((val >> 16) - (val & 0xffff)) > 1;
}
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return !arch_spin_is_locked(&lock);
}
#include <asm/qrwlock.h>
#endif /* __ASM_GENERIC_SPINLOCK_H */

View file

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_GENERIC_SPINLOCK_TYPES_H
#define __ASM_GENERIC_SPINLOCK_TYPES_H
#include <linux/types.h>
typedef atomic_t arch_spinlock_t;
/*
* qrwlock_types depends on arch_spinlock_t, so we must typedef that before the
* include.
*/
#include <asm/qrwlock_types.h>
#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0)
#endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */