[PATCH v4] futex_waitv.2: new page
From: наб <hidden>
Date: 2026-02-11 04:00:28
Signed-off-by: Ahelenia Ziemiańska <redacted> --- man/man2/futex_waitv.2 | 426 +++++++++++++++++++++++++++++++++++++++++ man/man7/futex.7 | 9 +- 2 files changed, 433 insertions(+), 2 deletions(-) create mode 100644 man/man2/futex_waitv.2
diff --git u/man/man2/futex_waitv.2 p/man/man2/futex_waitv.2
new file mode 100644
index 000000000..b05eb08ef
--- /dev/null
+++ p/man/man2/futex_waitv.2@@ -0,0 +1,426 @@ +.\" Copyright, the authors of the Linux man-pages project +.\" +.\" SPDX-License-Identifier: MIT +.\" +.TH futex_waitv 2 (date) "Linux man-pages (unreleased)" +.SH NAME +futex_waitv \- wait for FUTEX_WAKE operation on multiple futexes +.SH LIBRARY +Standard C library +.RI ( libc ,\~ \-lc ) +.SH SYNOPSIS +.nf +.BR "#include <linux/futex.h>" " /* Definition of " "struct futex_waitv" " */" +.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */" +.B #include <unistd.h> +.B #include <time.h> +.P +.BR "long syscall(" "unsigned int nr_futexes;" +.BI " SYS_futex_waitv, struct futex_waitv " waiters [ nr_futexes ], +.BI " unsigned int " nr_futexes ", unsigned int " flags , +.BI " const struct timespec *_Nullable " timeout ", clockid_t " clockid ");" +.fi +.P +.EX +struct futex_waitv { + u64 val; /* Expected value at \f[I]uaddr\f[] */ + u64 uaddr; /* User address to wait on */ + u32 flags; /* Flags for this waiter */ + u32 __reserved; /* Align to u64 */ +}; +.EE +.SH DESCRIPTION +.\" This name is used internally in the kernel +Implements the FUTEX_WAIT_MULTIPLE operation, +analogous to a synchronous atomic parallel +.BR FUTEX_WAIT (2const) +or +.B FUTEX_WAIT_PRIVATE +on up to +.B FUTEX_WAITV_MAX +futex words. +For an overview of futexes, see +.BR futex (7); +for a description of the general interface, see +.BR futex (2); +for general minutiae of futex waiting, see the page above. +.P +This operation tests that the values at the +futex words pointed to by the addresses +.IR waiters []. uaddr +still contain respective expected values +.IR waiters []. val , +and if so, sleeps waiting for a +.BR FUTEX_WAKE (2const) +operation on any of the futex words, +and returns the index of +.I a +waiter whose futex was woken. +.P +If the thread starts to sleep, +it is considered a waiter on all given futex words. +If any of the futex values do not match their respective +.IR waiters []. val , +the call fails immediately with the error +.BR EAGAIN . +.P +If +.I timeout +is not NULL, +.I *timeout +specifies a deadline measured against clock +.IR clockid . +This interval will be rounded up to the system clock granularity, +and is guaranteed not to expire early. +If +.I timeout +is NULL, the call blocks indefinitely. +.P +Futex words to monitor are given by +.IR "struct futex_waitv" , +whose fields are analogous to +.BR FUTEX_WAIT (2const) +parameters, except +.I __reserved +must be 0 +and +.I flags +must contain one of +.BR FUTEX2_SIZE_* +ORed with some of the flags below. +.P +C programs should assign to +.I uaddr +by casting a pointer to +.B uintptr_t +to ensure the top bits are cleared on 32-bit systems. +.TP +.BR FUTEX2_SIZE_U32 +.I val +and +.I *uaddr +are 32-bit unsigned integers. +.TP +.B FUTEX2_NUMA +The futex word is followed by another word of the same size +.RI ( uaddr +points to +.IR uint N _t[2] +rather than +.IR uint N _t . +The word is given by +.IR uaddr[1] ), +which can be either +.B FUTEX_NO_NODE +(all bits set) +or a NUMA node number. +.IP +If the NUMA word is +.BR FUTEX_NO_NODE , +the node number of the processor the syscall executes on is written to it. +(Except in an +.B EINVAL +or +.B EFAULT +condition, this happens to all waiters whose +.I flags +have +.B FUTEX2_NUMA +set.) +.IP +Futexes are placed on the NUMA node given by the NUMA word. +Futexes without this flag are placed on a random node. +.\" commit cec199c5e39bde7191a08087cc3d002ccfab31ff +.\" Author: Peter Zijlstra <peterz@infradead.org> +.\" Date: Wed Apr 16 18:29:16 2025 +0200 +.\" +.\" futex: Implement FUTEX2_NUMA +.\" +.\" FUTEX2_MPOL is not documented or used anywhere; +.\" it's unclear to me what it does +.\" (defined in commit c042c505210dc3453f378df432c10fff3d471bc5 +.\" "futex: Implement FUTEX2_MPOL") +.TP +.B FUTEX2_PRIVATE +By default, the futex is shared +.RB "(like " FUTEX_WAIT (2const)), +and can be accessed by multiple processes; +this flag waits on a private futex word, +where all users must use the same virtual memory map +(like +.BR FUTEX_WAIT_PRIVATE ; +this most often means they are part of the same process). +Private futexes are faster than shared ones. +.\" +.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.\" +.SH RETURN VALUE +Returns an index to an arbitrary entry in +.I waiters +corresponding to some woken-up futex. +This implies no information about other waiters. +.P +On error, +\-1 is returned, +and +.I errno +is set to indicate the error. +.\" +.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.\" +.SH ERRORS +.TP +.B EFAULT +.I waiters +points outside the accessible address space. +.TP +.B EFAULT +.I timeout +was not NULL and did not point to a valid user-space address. +.TP +.B EFAULT +Any +.IR waiters []. uaddr +field is not a valid user-space address. +.TP +.B EINVAL +Any +.IR waiters []. uaddr +field does not point to a valid object\[em]that is, +the address is not aligned appropriately for the specified +.BR FUTEX2_SIZE_* . +.TP +.B EINVAL +.I flags +was not 0. +.TP +.B EINVAL +.I nr_futexes +was not in +[1, +.B FUTEX_WAITV_MAX +(128)]. +.TP +.B EINVAL +.I timeout +was not NULL and +.I clockid +was not a valid clock +.RB ( CLOCK_MONOTONIC +or +.BR CLOCK_REALTIME ). +.TP +.B EINVAL +.I *timeout +is denormal (before epoch or +.I tv_nsec +more than 999'999'999). +.TP +.B EINVAL +Any +.IR waiters []. flags +field contains an unknown flag. +.TP +.B EINVAL +Any +.IR waiters []. flags +field is missing a +.B FUTEX2_SIZE_* +flag or has a size flag different than +.BR FUTEX2_SIZE_U32 +set. +.TP +.B EINVAL +Any +.IR waiters []. __reserved +field is not 0. +.TP +.B EINVAL +Any +.IR waiters []. value +field has more bits set than permitted than the size flags. +.TP +.B EINVAL +.B FUTEX2_NUMA +was set in +.IR waiters []. flags , +and the NUMA word +(which is the same size as the futex word) +is too small to contain the index of the biggest NUMA domain +(for example, +.B FUTEX2_SIZE_U8 +and there are more than 255 NUMA domains). +.TP +.B EINVAL +.B FUTEX2_NUMA +was set in +.IR waiters []. flags , +and the NUMA word is larger than the maximum possible NUMA node and not +.BR FUTEX_NO_NODE . +.TP +.B ETIMEDOUT +.I timeout +was not NULL and no futex was woken before the timeout elapsed. +.TP +.BR EAGAIN " or " EWOULDBLOCK +The value pointed to by +.I uaddr +was not equal to the expected value +.I val +at the time of the call. +.TP +.B EINTR +The +operation was interrupted by a signal (see +.BR signal (7)). +.\" +.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.\" +.SH STANDARDS +Linux. +.SH NOTES +.BR FUTEX2_SIZE_U8 , +.BR FUTEX2_SIZE_U16 , +and +.B FUTEX2_SIZE_U64 +where +.I val +and +.I *uaddr +are 8, 16, or 64 bits are defined, but not implemented +.RB ( EINVAL ). +.SH HISTORY +.\" commit bf69bad38cf63d980e8a603f8d1bd1f85b5ed3d9 +.\" Author: André Almeida <andrealmeid@igalia.com> +.\" Date: Thu Sep 23 14:11:05 2021 -0300 +.\" +.\" futex: Implement sys_futex_waitv() +Linux 5.16. +.SH EXAMPLES +The program below executes a linear-time operation on 10 threads, +displaying the results in real time, +waiting at most 1 second for each new result. +The first 3 threads operate on the same data (complete in the same time). +.B !\& +indicates the futex that woke up each +.BR futex_waitv (). +.in +4 +.EX +.RB $\~ ./futex_waitv +153 153 153 237 100 245 177 127 215 61 + 122! + 200! + 254! +306 306! + 306! + 354! + 430! + 474! + 490! +Connection timed out +.EE +.P +.\" SRC BEGIN (futex_waitv.c) +.EX +#include <errno.h> +#include <inttypes.h> +#include <linux/futex.h> +#include <pthread.h> +#include <stdatomic.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/syscall.h> +#include <time.h> +#include <unistd.h> +\& +static inline long +my_futex_wait_private(_Atomic uint32_t *uaddr, uint32_t val, + const struct timespec *_Nullable timeout) +{ + return syscall(SYS_futex, uaddr, FUTEX_WAKE_PRIVATE, val, timeout); +} +\& +static inline long +my_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, + unsigned int flags, const struct timespec *_Nullable timeout, + clockid_t clockid) +{ + return syscall(SYS_futex_waitv, waiters, nr_futexes, flags, timeout, clockid); +} +\& +void * +worker(void *arg) +{ + _Atomic uint32_t *futex = arg; +\& + usleep(*futex * 10000); + *futex *= 2; + my_futex_wait_private(futex, 1, NULL); + return NULL; +} +\& +int +main(void) +{ + _Atomic uint32_t futexes[10]; + uint8_t init[countof(futexes)]; + struct futex_waitv waiters[countof(futexes)] = {}; + int i; +\& + getentropy(init, sizeof(init)); + init[0] = init[1] = init[2]; + for (i = 0; i < countof(futexes); ++i) { + printf("%" PRIu8 "\[rs]t", init[i]); + atomic_init(&futexes[i], init[i]); + pthread_create(&(pthread_t){}, NULL, worker, &futexes[i]); + } + putchar('\[rs]n'); +\& + for (i = 0; i < countof(futexes); ++i) { + waiters[i].val = futexes[i]; + waiters[i].uaddr = (uintptr_t)&futexes[i]; + waiters[i].flags = FUTEX2_SIZE_U32 | FUTEX2_PRIVATE; + } + for (;;) { + struct timespec timeout; + int woke; +\& + clock_gettime(CLOCK_MONOTONIC, &timeout); + timeout.tv_sec += 1; +\& + woke = my_futex_waitv(waiters, countof(futexes), 0, &timeout, CLOCK_MONOTONIC); + if (woke == -1 && (errno != EAGAIN && errno != EWOULDBLOCK)) + break; +\& + for (i = 0; i < countof(futexes); ++i) { + if (futexes[i] != waiters[i].val) + printf("%" PRIu32 "%s", futexes[i], i == woke ? "!" : ""); + putchar('\[rs]t'); + } + putchar('\[rs]n'); +\& + for (i = 0; i < countof(futexes); ++i) + waiters[i].val = futexes[i]; + } + fprintf(stderr, "%s\[rs]n", strerror(errno)); +} +.EE +.\" SRC END +.SH SEE ALSO +.ad l +.BR futex (2), +.BR FUTEX_WAIT (2const), +.BR FUTEX_WAKE (2const), +.BR futex (7) +.P +The following kernel source files: +.IP \[bu] +.I Documentation/userspace-api/futex2.rst +.IP \[bu] +.I kernel/futex/syscall.c +.IP \[bu] +.I kernel/futex/waitwake.c +.IP \[bu] +.I kernel/futex/futex.h
diff --git u/man/man7/futex.7 p/man/man7/futex.7
index 51c5d5d9b..d271144ff 100644
--- u/man/man7/futex.7
+++ p/man/man7/futex.7@@ -45,7 +45,9 @@ .SS Semantics Any futex operation starts in user space, but it may be necessary to communicate with the kernel using the .BR futex (2) -system call. +or +.BR futex_waitv (2) +system calls. .P To "up" a futex, execute the proper assembler instructions that will cause the host CPU to atomically increment the integer.
@@ -72,7 +74,9 @@ .SS Semantics .P The .BR futex (2) -system call can optionally be passed a timeout specifying how long +and +.BR futex_waitv (2) +system calls can optionally be passed a timeout specifying how long the kernel should wait for the futex to be upped. In this case, semantics are more complex and the programmer is referred
@@ -107,6 +111,7 @@ .SH NOTES .SH SEE ALSO .BR clone (2), .BR futex (2), +.BR futex_waitv (2), .BR get_robust_list (2), .BR set_robust_list (2), .BR set_tid_address (2),
--
2.39.5Attachments
- signature.asc [application/pgp-signature] 833 bytes