futex: Add sys_futex_wait()

To complement sys_futex_waitv()/wake(), add sys_futex_wait(). This
syscall implements what was previously known as FUTEX_WAIT_BITSET
except it uses 'unsigned long' for the value and bitmask arguments,
takes timespec and clockid_t arguments for the absolute timeout and
uses FUTEX2 flags.

The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20230921105248.164324363@noisy.programming.kicks-ass.net
This commit is contained in:
peterz@infradead.org 2023-09-21 12:45:12 +02:00 committed by Peter Zijlstra
parent 43adf84495
commit cb8c4312af
24 changed files with 156 additions and 57 deletions

View File

@ -493,3 +493,4 @@
561 common cachestat sys_cachestat
562 common fchmodat2 sys_fchmodat2
563 common futex_wake sys_futex_wake
564 common futex_wait sys_futex_wait

View File

@ -467,3 +467,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -39,7 +39,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
#define __NR_compat_syscalls 455
#define __NR_compat_syscalls 456
#endif
#define __ARCH_WANT_SYS_CLONE

View File

@ -913,6 +913,8 @@ __SYSCALL(__NR_cachestat, sys_cachestat)
__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
#define __NR_futex_wake 454
__SYSCALL(__NR_futex_wake, sys_futex_wake)
#define __NR_futex_wait 455
__SYSCALL(__NR_futex_wait, sys_futex_wait)
/*
* Please add new compat syscalls above this comment and update

View File

@ -374,3 +374,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -453,3 +453,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -459,3 +459,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -392,3 +392,4 @@
451 n32 cachestat sys_cachestat
452 n32 fchmodat2 sys_fchmodat2
454 n32 futex_wake sys_futex_wake
455 n32 futex_wait sys_futex_wait

View File

@ -368,3 +368,4 @@
451 n64 cachestat sys_cachestat
452 n64 fchmodat2 sys_fchmodat2
454 n64 futex_wake sys_futex_wake
455 n64 futex_wait sys_futex_wait

View File

@ -441,3 +441,4 @@
451 o32 cachestat sys_cachestat
452 o32 fchmodat2 sys_fchmodat2
454 o32 futex_wake sys_futex_wake
455 o32 futex_wait sys_futex_wait

View File

@ -452,3 +452,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -540,3 +540,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -456,3 +456,4 @@
451 common cachestat sys_cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait sys_futex_wait

View File

@ -456,3 +456,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -499,3 +499,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -458,3 +458,4 @@
451 i386 cachestat sys_cachestat
452 i386 fchmodat2 sys_fchmodat2
454 i386 futex_wake sys_futex_wake
455 i386 futex_wait sys_futex_wait

View File

@ -376,6 +376,7 @@
452 common fchmodat2 sys_fchmodat2
453 64 map_shadow_stack sys_map_shadow_stack
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
#
# Due to a historical design error, certain syscalls are numbered differently

View File

@ -424,3 +424,4 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait

View File

@ -552,6 +552,10 @@ asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags);
asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask,
unsigned int flags, struct __kernel_timespec __user *timespec,
clockid_t clockid);
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
struct __kernel_timespec __user *rmtp);
asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp,

View File

@ -824,9 +824,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat)
__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
#define __NR_futex_wake 454
__SYSCALL(__NR_futex_wake, sys_futex_wake)
#define __NR_futex_wait 455
__SYSCALL(__NR_futex_wait, sys_futex_wait)
#undef __NR_syscalls
#define __NR_syscalls 455
#define __NR_syscalls 456
/*
* 32 bit systems traditionally used different

View File

@ -332,6 +332,9 @@ extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
u32 __user *uaddr2, int nr_wake, int nr_requeue,
u32 *cmpval, int requeue_pi);
extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
struct hrtimer_sleeper *to, u32 bitset);
extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
ktime_t *abs_time, u32 bitset);

View File

@ -221,6 +221,46 @@ static int futex_parse_waitv(struct futex_vector *futexv,
return 0;
}
static int futex2_setup_timeout(struct __kernel_timespec __user *timeout,
clockid_t clockid, struct hrtimer_sleeper *to)
{
int flag_clkid = 0, flag_init = 0;
struct timespec64 ts;
ktime_t time;
int ret;
if (!timeout)
return 0;
if (clockid == CLOCK_REALTIME) {
flag_clkid = FLAGS_CLOCKRT;
flag_init = FUTEX_CLOCK_REALTIME;
}
if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
return -EINVAL;
if (get_timespec64(&ts, timeout))
return -EFAULT;
/*
* Since there's no opcode for futex_waitv, use
* FUTEX_WAIT_BITSET that uses absolute timeout as well
*/
ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
if (ret)
return ret;
futex_setup_timer(&time, to, flag_clkid, 0);
return 0;
}
static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to)
{
hrtimer_cancel(&to->timer);
destroy_hrtimer_on_stack(&to->timer);
}
/**
* sys_futex_waitv - Wait on a list of futexes
* @waiters: List of futexes to wait on
@ -250,8 +290,6 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
{
struct hrtimer_sleeper to;
struct futex_vector *futexv;
struct timespec64 ts;
ktime_t time;
int ret;
/* This syscall supports no flags for now */
@ -261,30 +299,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
return -EINVAL;
if (timeout) {
int flag_clkid = 0, flag_init = 0;
if (clockid == CLOCK_REALTIME) {
flag_clkid = FLAGS_CLOCKRT;
flag_init = FUTEX_CLOCK_REALTIME;
}
if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
return -EINVAL;
if (get_timespec64(&ts, timeout))
return -EFAULT;
/*
* Since there's no opcode for futex_waitv, use
* FUTEX_WAIT_BITSET that uses absolute timeout as well
*/
ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
if (ret)
return ret;
futex_setup_timer(&time, &to, flag_clkid, 0);
}
if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
return ret;
futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
if (!futexv) {
@ -299,10 +315,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
kfree(futexv);
destroy_timer:
if (timeout) {
hrtimer_cancel(&to.timer);
destroy_hrtimer_on_stack(&to.timer);
}
if (timeout)
futex2_destroy_timeout(&to);
return ret;
}
@ -336,6 +350,52 @@ SYSCALL_DEFINE4(futex_wake,
return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask);
}
/*
* sys_futex_wait - Wait on a futex
* @uaddr: Address of the futex to wait on
* @val: Value of @uaddr
* @mask: bitmask
* @flags: FUTEX2 flags
* @timeout: Optional absolute timeout
* @clockid: Clock to be used for the timeout, realtime or monotonic
*
* Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the
* futex2 familiy of calls.
*/
SYSCALL_DEFINE6(futex_wait,
void __user *, uaddr,
unsigned long, val,
unsigned long, mask,
unsigned int, flags,
struct __kernel_timespec __user *, timeout,
clockid_t, clockid)
{
struct hrtimer_sleeper to;
int ret;
if (flags & ~FUTEX2_VALID_MASK)
return -EINVAL;
flags = futex2_to_flags(flags);
if (!futex_flags_valid(flags))
return -EINVAL;
if (!futex_validate_input(flags, val) ||
!futex_validate_input(flags, mask))
return -EINVAL;
if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
return ret;
ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask);
if (timeout)
futex2_destroy_timeout(&to);
return ret;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(set_robust_list,
struct compat_robust_list_head __user *, head,

View File

@ -632,20 +632,18 @@ retry_private:
return ret;
}
int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
struct hrtimer_sleeper *to, u32 bitset)
{
struct hrtimer_sleeper timeout, *to;
struct restart_block *restart;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
struct futex_hash_bucket *hb;
int ret;
if (!bitset)
return -EINVAL;
q.bitset = bitset;
to = futex_setup_timer(abs_time, &timeout, flags,
current->timer_slack_ns);
retry:
/*
* Prepare to wait on uaddr. On success, it holds hb->lock and q
@ -653,18 +651,17 @@ retry:
*/
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
if (ret)
goto out;
return ret;
/* futex_queue and wait for wakeup, timeout, or a signal. */
futex_wait_queue(hb, &q, to);
/* If we were woken (and unqueued), we succeeded, whatever. */
ret = 0;
if (!futex_unqueue(&q))
goto out;
ret = -ETIMEDOUT;
return 0;
if (to && !to->task)
goto out;
return -ETIMEDOUT;
/*
* We expect signal_pending(current), but we might be the
@ -673,24 +670,38 @@ retry:
if (!signal_pending(current))
goto retry;
ret = -ERESTARTSYS;
if (!abs_time)
goto out;
return -ERESTARTSYS;
}
restart = &current->restart_block;
restart->futex.uaddr = uaddr;
restart->futex.val = val;
restart->futex.time = *abs_time;
restart->futex.bitset = bitset;
restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
{
struct hrtimer_sleeper timeout, *to;
struct restart_block *restart;
int ret;
ret = set_restart_fn(restart, futex_wait_restart);
to = futex_setup_timer(abs_time, &timeout, flags,
current->timer_slack_ns);
out:
if (to) {
hrtimer_cancel(&to->timer);
destroy_hrtimer_on_stack(&to->timer);
ret = __futex_wait(uaddr, flags, val, to, bitset);
/* No timeout, nothing to clean up. */
if (!to)
return ret;
hrtimer_cancel(&to->timer);
destroy_hrtimer_on_stack(&to->timer);
if (ret == -ERESTARTSYS) {
restart = &current->restart_block;
restart->futex.uaddr = uaddr;
restart->futex.val = val;
restart->futex.time = *abs_time;
restart->futex.bitset = bitset;
restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
return set_restart_fn(restart, futex_wait_restart);
}
return ret;
}

View File

@ -88,6 +88,7 @@ COND_SYSCALL(get_robust_list);
COND_SYSCALL_COMPAT(get_robust_list);
COND_SYSCALL(futex_waitv);
COND_SYSCALL(futex_wake);
COND_SYSCALL(futex_wait);
COND_SYSCALL(kexec_load);
COND_SYSCALL_COMPAT(kexec_load);
COND_SYSCALL(init_module);