diff --git a/lib/rwsem.c b/lib/rwsem.c index 9a675fa9d78e..bbe48c04f363 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -4,6 +4,7 @@ * Derived from arch/i386/kernel/semaphore.c * * Writer lock-stealing by Alex Shi + * and Michel Lespinasse */ #include #include @@ -41,13 +42,11 @@ struct rwsem_waiter { enum rwsem_waiter_type type; }; -/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and - * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held - * since the rwsem value was observed. - */ -#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ -#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ -#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ +enum rwsem_wake_type { + RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ + RWSEM_WAKE_READERS, /* Wake readers only */ + RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ +}; /* * handle the lock release when processes blocked on it that can now run @@ -60,16 +59,16 @@ struct rwsem_waiter { * - writers are only woken if downgrading is false */ static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) +__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) { struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long woken, loop, adjustment; + signed long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wake_type != RWSEM_WAKE_READ_OWNED) + if (wake_type == RWSEM_WAKE_ANY) /* Wake writer at the front of the queue, but do not * grant it the lock yet as we want other writers * to be able to steal it. Readers, on the other hand, @@ -79,24 +78,24 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) goto out; } - /* If we come here from up_xxxx(), another thread might have reached - * rwsem_down_failed_common() before we acquired the spinlock and - * woken up a waiter, making it now active. We prefer to check for - * this first in order to not spend too much time with the spinlock - * held if we're not going to be able to wake up readers in the end. - * - * Note that we do not need to update the rwsem count: any writer - * trying to acquire rwsem will run rwsem_down_write_failed() due - * to the waiting threads and block trying to acquire the spinlock. - * - * We use a dummy atomic update in order to acquire the cache line - * exclusively since we expect to succeed and run the final rwsem - * count adjustment pretty soon. + /* Writers might steal the lock before we grant it to the next reader. + * We prefer to do the first reader grant before counting readers + * so we can bail out early if a writer stole the lock. */ - if (wake_type == RWSEM_WAKE_ANY && - rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) - /* Someone grabbed the sem for write already */ - goto out; + adjustment = 0; + if (wake_type != RWSEM_WAKE_READ_OWNED) { + adjustment = RWSEM_ACTIVE_READ_BIAS; + try_reader_grant: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { + /* A writer stole the lock. Undo our reader grant. */ + if (rwsem_atomic_update(-adjustment, sem) & + RWSEM_ACTIVE_MASK) + goto out; + /* Last active locker left. Retry waking readers. */ + goto try_reader_grant; + } + } /* Grant an infinite number of read locks to the readers at the front * of the queue. Note we increment the 'active part' of the count by @@ -114,12 +113,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - adjustment = woken * RWSEM_ACTIVE_READ_BIAS; + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; if (waiter->type != RWSEM_WAITING_FOR_WRITE) /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; - rwsem_atomic_add(adjustment, sem); + if (adjustment) + rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; loop = woken; @@ -164,8 +164,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) count = rwsem_atomic_update(adjustment, sem); /* If there are no active locks, wake the front queued process(es). */ - if (count == RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + if (!(count & RWSEM_ACTIVE_MASK)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); raw_spin_unlock_irq(&sem->wait_lock); @@ -209,7 +209,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) * any read locks that were queued ahead of us. */ if (count > RWSEM_WAITING_BIAS && adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); /* wait until we successfully acquire the lock */ set_task_state(tsk, TASK_UNINTERRUPTIBLE);