[PATCH v9 10/12] bpf/rqspinlock: Use smp_cond_load_acquire_timeout()
From: Ankur Arora <hidden>
Date: 2026-02-09 02:32:55
Also in:
bpf, linux-arch, linux-pm, lkml
Subsystem:
bpf [general] (safe dynamic programs and tools), the rest · Maintainers:
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi, Linus Torvalds
Switch out the conditional load interfaces used by rqspinlock
to smp_cond_read_acquire_timeout() and its wrapper,
atomic_cond_read_acquire_timeout().
Both these handle the timeout and amortize as needed, so use
clock_deadlock() directly instead of going through RES_CHECK_TIMEOUT().
For correctness, however, we need to ensure that the timeout case in
smp_cond_read_acquire_timeout() always agrees with that in
clock_deadlock(), which returns with -ETIMEDOUT.
For the most part, this is fine because smp_cond_load_acquire_timeout()
does not have an independent clock and does not do double reads from
clock_deadlock() which could cause its internal state to go out of
sync from that of clock_deadlock().
There is, however, an edge case where clock_deadlock() checks for:
if (time > ts->timeout_end)
return -ETIMEDOUT;
while smp_cond_load_acquire_timeout() checks for:
__time_now = (time_expr_ns);
if (__time_now <= 0 || __time_now >= __time_end) {
VAL = READ_ONCE(*__PTR);
break;
}
This runs into a problem when (__time_now == __time_end) since
clock_deadlock() does not treat it as a timeout condition but
the second clause in the conditional above does.
So, add an equality check in clock_deadlock().
Finally, redefine SMP_TIMEOUT_POLL_COUNT to be 16k to be similar to the
spin-count used in RES_CHECK_TIMEOUT(). We only do this for non-arm64
as that uses a waiting implementation.
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: bpf@vger.kernel.org
Signed-off-by: Ankur Arora <redacted>
---
kernel/bpf/rqspinlock.c | 37 ++++++++++++++++++++-----------------
1 file changed, 20 insertions(+), 17 deletions(-)
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index ac9b3572e42f..2a361c4c7393 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c@@ -215,7 +215,7 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, } time = ktime_get_mono_fast_ns(); - if (time > ts->timeout_end) + if (time >= ts->timeout_end) return -ETIMEDOUT; /*
@@ -235,20 +235,14 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, } /* - * Do not amortize with spins when res_smp_cond_load_acquire is defined, - * as the macro does internal amortization for us. + * Amortize timeout check for busy-wait loops. */ -#ifndef res_smp_cond_load_acquire #define RES_CHECK_TIMEOUT(ts, ret, mask) \ ({ \ if (!(ts).spin++) \ (ret) = clock_deadlock((lock), (mask), &(ts));\ (ret); \ }) -#else -#define RES_CHECK_TIMEOUT(ts, ret, mask) \ - ({ (ret) = clock_deadlock((lock), (mask), &(ts)); }) -#endif /* * Initialize the 'spin' member.
@@ -262,6 +256,18 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, */ #define RES_RESET_TIMEOUT(ts, _duration) ({ (ts).timeout_end = 0; (ts).duration = _duration; }) +/* + * Limit how often we invoke clock_deadlock() while spin-waiting in + * smp_cond_load_acquire_timeout() or atomic_cond_read_acquire_timeout(). + * + * (ARM64 generally uses a waited implementation so we use the default + * value there.) + */ +#ifndef CONFIG_ARM64 +#undef SMP_TIMEOUT_POLL_COUNT +#define SMP_TIMEOUT_POLL_COUNT (16*1024) +#endif + /* * Provide a test-and-set fallback for cases when queued spin lock support is * absent from the architecture.
@@ -312,12 +318,6 @@ EXPORT_SYMBOL_GPL(resilient_tas_spin_lock); */ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]); -#ifndef res_smp_cond_load_acquire -#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire(v, c) -#endif - -#define res_atomic_cond_read_acquire(v, c) res_smp_cond_load_acquire(&(v)->counter, (c)) - /** * resilient_queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure
@@ -415,7 +415,9 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) */ if (val & _Q_LOCKED_MASK) { RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); - res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, timeout_err, _Q_LOCKED_MASK) < 0); + smp_cond_load_acquire_timeout(&lock->locked, !VAL, + (timeout_err = clock_deadlock(lock, _Q_LOCKED_MASK, &ts)), + ts.duration); } if (timeout_err < 0) {
@@ -577,8 +579,9 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) * us. */ RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2); - val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) || - RES_CHECK_TIMEOUT(ts, timeout_err, _Q_LOCKED_PENDING_MASK) < 0); + val = atomic_cond_read_acquire_timeout(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK), + (timeout_err = clock_deadlock(lock, _Q_LOCKED_PENDING_MASK, &ts)), + ts.duration); /* Disable queue destruction when we detect deadlocks. */ if (timeout_err == -EDEADLK) {
--
2.31.1