mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-08-24 09:50:04 +00:00
Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking fixes from Ingo Molnar: "This contains two qspinlock fixes and three documentation and comment fixes" * 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/semaphore: Update the file path in documentation locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit() locking/qspinlock: Ensure node->count is updated before initialising node locking/qspinlock: Ensure node is initialised before updating prev->next Documentation/locking/mutex-design: Update to reflect latest changes
This commit is contained in:
commit
e9e3b3002f
5 changed files with 42 additions and 42 deletions
|
@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
|
||||||
|
|
||||||
- RMW operations that have a return value are fully ordered.
|
- RMW operations that have a return value are fully ordered.
|
||||||
|
|
||||||
Except for test_and_set_bit_lock() which has ACQUIRE semantics and
|
- RMW operations that are conditional are unordered on FAILURE,
|
||||||
|
otherwise the above rules apply. In the case of test_and_{}_bit() operations,
|
||||||
|
if the bit in memory is unchanged by the operation then it is deemed to have
|
||||||
|
failed.
|
||||||
|
|
||||||
|
Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
|
||||||
clear_bit_unlock() which has RELEASE semantics.
|
clear_bit_unlock() which has RELEASE semantics.
|
||||||
|
|
||||||
Since a platform only has a single means of achieving atomic operations
|
Since a platform only has a single means of achieving atomic operations
|
||||||
|
|
|
@ -21,37 +21,23 @@ Implementation
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
|
Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
|
||||||
and implemented in kernel/locking/mutex.c. These locks use a three
|
and implemented in kernel/locking/mutex.c. These locks use an atomic variable
|
||||||
state atomic counter (->count) to represent the different possible
|
(->owner) to keep track of the lock state during its lifetime. Field owner
|
||||||
transitions that can occur during the lifetime of a lock:
|
actually contains 'struct task_struct *' to the current lock owner and it is
|
||||||
|
therefore NULL if not currently owned. Since task_struct pointers are aligned
|
||||||
1: unlocked
|
at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
|
||||||
0: locked, no waiters
|
if waiter list is non-empty). In its most basic form it also includes a
|
||||||
negative: locked, with potential waiters
|
wait-queue and a spinlock that serializes access to it. Furthermore,
|
||||||
|
CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
|
||||||
In its most basic form it also includes a wait-queue and a spinlock
|
below in (ii).
|
||||||
that serializes access to it. CONFIG_SMP systems can also include
|
|
||||||
a pointer to the lock task owner (->owner) as well as a spinner MCS
|
|
||||||
lock (->osq), both described below in (ii).
|
|
||||||
|
|
||||||
When acquiring a mutex, there are three possible paths that can be
|
When acquiring a mutex, there are three possible paths that can be
|
||||||
taken, depending on the state of the lock:
|
taken, depending on the state of the lock:
|
||||||
|
|
||||||
(i) fastpath: tries to atomically acquire the lock by decrementing the
|
(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
|
||||||
counter. If it was already taken by another task it goes to the next
|
the current task. This only works in the uncontended case (cmpxchg() checks
|
||||||
possible path. This logic is architecture specific. On x86-64, the
|
against 0UL, so all 3 state bits above have to be 0). If the lock is
|
||||||
locking fastpath is 2 instructions:
|
contended it goes to the next possible path.
|
||||||
|
|
||||||
0000000000000e10 <mutex_lock>:
|
|
||||||
e21: f0 ff 0b lock decl (%rbx)
|
|
||||||
e24: 79 08 jns e2e <mutex_lock+0x1e>
|
|
||||||
|
|
||||||
the unlocking fastpath is equally tight:
|
|
||||||
|
|
||||||
0000000000000bc0 <mutex_unlock>:
|
|
||||||
bc8: f0 ff 07 lock incl (%rdi)
|
|
||||||
bcb: 7f 0a jg bd7 <mutex_unlock+0x17>
|
|
||||||
|
|
||||||
|
|
||||||
(ii) midpath: aka optimistic spinning, tries to spin for acquisition
|
(ii) midpath: aka optimistic spinning, tries to spin for acquisition
|
||||||
while the lock owner is running and there are no other tasks ready
|
while the lock owner is running and there are no other tasks ready
|
||||||
|
@ -143,11 +129,10 @@ Test if the mutex is taken:
|
||||||
Disadvantages
|
Disadvantages
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
Unlike its original design and purpose, 'struct mutex' is larger than
|
Unlike its original design and purpose, 'struct mutex' is among the largest
|
||||||
most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
|
locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
|
||||||
as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
|
is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
|
||||||
for the largest lock in the kernel. Larger structure sizes mean more
|
cache and memory footprint.
|
||||||
CPU cache and memory footprint.
|
|
||||||
|
|
||||||
When to use mutexes
|
When to use mutexes
|
||||||
-------------------
|
-------------------
|
||||||
|
|
|
@ -7,7 +7,8 @@
|
||||||
* @nr: Bit to set
|
* @nr: Bit to set
|
||||||
* @addr: Address to count from
|
* @addr: Address to count from
|
||||||
*
|
*
|
||||||
* This operation is atomic and provides acquire barrier semantics.
|
* This operation is atomic and provides acquire barrier semantics if
|
||||||
|
* the returned value is 0.
|
||||||
* It can be used to implement bit locks.
|
* It can be used to implement bit locks.
|
||||||
*/
|
*/
|
||||||
#define test_and_set_bit_lock(nr, addr) test_and_set_bit(nr, addr)
|
#define test_and_set_bit_lock(nr, addr) test_and_set_bit(nr, addr)
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
*
|
*
|
||||||
* Distributed under the terms of the GNU GPL, version 2
|
* Distributed under the terms of the GNU GPL, version 2
|
||||||
*
|
*
|
||||||
* Please see kernel/semaphore.c for documentation of these functions
|
* Please see kernel/locking/semaphore.c for documentation of these functions
|
||||||
*/
|
*/
|
||||||
#ifndef __LINUX_SEMAPHORE_H
|
#ifndef __LINUX_SEMAPHORE_H
|
||||||
#define __LINUX_SEMAPHORE_H
|
#define __LINUX_SEMAPHORE_H
|
||||||
|
|
|
@ -379,6 +379,14 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||||
tail = encode_tail(smp_processor_id(), idx);
|
tail = encode_tail(smp_processor_id(), idx);
|
||||||
|
|
||||||
node += idx;
|
node += idx;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure that we increment the head node->count before initialising
|
||||||
|
* the actual node. If the compiler is kind enough to reorder these
|
||||||
|
* stores, then an IRQ could overwrite our assignments.
|
||||||
|
*/
|
||||||
|
barrier();
|
||||||
|
|
||||||
node->locked = 0;
|
node->locked = 0;
|
||||||
node->next = NULL;
|
node->next = NULL;
|
||||||
pv_init_node(node);
|
pv_init_node(node);
|
||||||
|
@ -408,14 +416,15 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||||
*/
|
*/
|
||||||
if (old & _Q_TAIL_MASK) {
|
if (old & _Q_TAIL_MASK) {
|
||||||
prev = decode_tail(old);
|
prev = decode_tail(old);
|
||||||
/*
|
|
||||||
* The above xchg_tail() is also a load of @lock which
|
|
||||||
* generates, through decode_tail(), a pointer. The address
|
|
||||||
* dependency matches the RELEASE of xchg_tail() such that
|
|
||||||
* the subsequent access to @prev happens after.
|
|
||||||
*/
|
|
||||||
|
|
||||||
WRITE_ONCE(prev->next, node);
|
/*
|
||||||
|
* We must ensure that the stores to @node are observed before
|
||||||
|
* the write to prev->next. The address dependency from
|
||||||
|
* xchg_tail is not sufficient to ensure this because the read
|
||||||
|
* component of xchg_tail is unordered with respect to the
|
||||||
|
* initialisation of @node.
|
||||||
|
*/
|
||||||
|
smp_store_release(&prev->next, node);
|
||||||
|
|
||||||
pv_wait_node(node, prev);
|
pv_wait_node(node, prev);
|
||||||
arch_mcs_spin_lock_contended(&node->locked);
|
arch_mcs_spin_lock_contended(&node->locked);
|
||||||
|
|
Loading…
Reference in a new issue