Re: [PATCH 17/17] powerpc/qspinlock: provide accounting and options for sleepy locks
From: "Nicholas Piggin" <npiggin@gmail.com>
Date: 2022-11-10 11:42:33
On Thu Nov 10, 2022 at 10:44 AM AEST, Jordan Niethe wrote:
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: [resend as utf-8, not utf-7]quoted
Finding the owner or a queued waiter on a lock with a preempted vcpu is indicative of an oversubscribed guest causing the lock to get into trouble. Provide some options to detect this situation and have new CPUs avoid queueing for a longer time (more steal iterations) to minimise the problems caused by vcpu preemption on the queue. --- arch/powerpc/include/asm/qspinlock_types.h | 7 +- arch/powerpc/lib/qspinlock.c | 240 +++++++++++++++++++-- 2 files changed, 232 insertions(+), 15 deletions(-)diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 35f9525381e6..4fbcc8a4230b 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h@@ -30,7 +30,7 @@ typedef struct qspinlock { * * 0: locked bit * 1-14: lock holder cpu - * 15: unused bit + * 15: lock owner or queuer vcpus observed to be preempted bit * 16: must queue bit * 17-31: tail cpu (+1) */@@ -49,6 +49,11 @@ typedef struct qspinlock { #error "qspinlock does not support such large CONFIG_NR_CPUS" #endif +#define _Q_SLEEPY_OFFSET 15 +#define _Q_SLEEPY_BITS 1 +#define _Q_SLEEPY_MASK _Q_SET_MASK(SLEEPY_OWNER) +#define _Q_SLEEPY_VAL (1U << _Q_SLEEPY_OFFSET) + #define _Q_MUST_Q_OFFSET 16 #define _Q_MUST_Q_BITS 1 #define _Q_MUST_Q_MASK _Q_SET_MASK(MUST_Q)diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 5cfd69931e31..c18133c01450 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c@@ -5,6 +5,7 @@ #include <linux/percpu.h> #include <linux/smp.h> #include <linux/topology.h> +#include <linux/sched/clock.h> #include <asm/qspinlock.h> #include <asm/paravirt.h>@@ -36,24 +37,54 @@ static int HEAD_SPINS __read_mostly = (1<<8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; static bool pv_spin_on_preempted_owner __read_mostly = false; +static bool pv_sleepy_lock __read_mostly = true; +static bool pv_sleepy_lock_sticky __read_mostly = false;The sticky part could potentially be its own patch.
I'll see how that looks.
quoted
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; +static int pv_sleepy_lock_factor __read_mostly = 256; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); -static __always_inline int get_steal_spins(bool paravirt, bool remote) +static __always_inline bool recently_sleepy(void) +{Other users of pv_sleepy_lock_interval_ns first check pv_sleepy_lock.
In this case it should be implied, I've added a comment.
quoted
+ if (pv_sleepy_lock_interval_ns) { + u64 seen = this_cpu_read(sleepy_lock_seen_clock); + + if (seen) { + u64 delta = sched_clock() - seen; + if (delta < pv_sleepy_lock_interval_ns) + return true; + this_cpu_write(sleepy_lock_seen_clock, 0); + } + } + + return false; +} + +static __always_inline int get_steal_spins(bool paravirt, bool remote, bool sleepy)It seems like paravirt is implied by sleepy.quoted
{ if (remote) { - return REMOTE_STEAL_SPINS; + if (paravirt && sleepy) + return REMOTE_STEAL_SPINS * pv_sleepy_lock_factor; + else + return REMOTE_STEAL_SPINS; } else { - return STEAL_SPINS; + if (paravirt && sleepy) + return STEAL_SPINS * pv_sleepy_lock_factor; + else + return STEAL_SPINS; } }I think that separate functions would still be nicer but this could get rid of the nesting conditionals like int spins; if (remote) spins = REMOTE_STEAL_SPINS; else spins = STEAL_SPINS; if (sleepy) return spins * pv_sleepy_lock_factor; return spins;
Yeah it was getting a bit out of hand.
quoted
-static __always_inline int get_head_spins(bool paravirt) +static __always_inline int get_head_spins(bool paravirt, bool sleepy) { - return HEAD_SPINS; + if (paravirt && sleepy) + return HEAD_SPINS * pv_sleepy_lock_factor; + else + return HEAD_SPINS; } static inline u32 encode_tail_cpu(void)@@ -206,6 +237,60 @@ static __always_inline u32 lock_clear_mustq(struct qspinlock *lock) return prev; } +static __always_inline bool lock_try_set_sleepy(struct qspinlock *lock, u32 old) +{ + u32 prev; + u32 new = old | _Q_SLEEPY_VAL; + + BUG_ON(!(old & _Q_LOCKED_VAL)); + BUG_ON(old & _Q_SLEEPY_VAL); + + asm volatile( +"1: lwarx %0,0,%1 # lock_try_set_sleepy \n" +" cmpw 0,%0,%2 \n" +" bne- 2f \n" +" stwcx. %3,0,%1 \n" +" bne- 1b \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r"(old), "r" (new) + : "cr0", "memory"); + + if (prev == old) + return true; + return false; +} + +static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (!(val & _Q_SLEEPY_VAL)) + lock_try_set_sleepy(lock, val); + } +} + +static __always_inline void seen_sleepy_lock(void) +{ + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); +} + +static __always_inline void seen_sleepy_node(struct qspinlock *lock) +{If yield_to_prev() was made to take a raw val, that val could be passed to seen_sleepy_node() and it would not need to get it by itself.
Yep.
quoted
+ if (pv_sleepy_lock) { + u32 val = READ_ONCE(lock->val); + + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (val & _Q_LOCKED_VAL) { + if (!(val & _Q_SLEEPY_VAL)) + lock_try_set_sleepy(lock, val); + } + } +} + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = get_tail_cpu(val);@@ -244,6 +329,7 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 spin_end(); + seen_sleepy_owner(lock, val); *preempted = true; /*@@ -307,11 +393,13 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int } } -static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt, bool *preempted) { u32 yield_count; int yield_cpu; + *preempted = false; + if (!paravirt) goto relax;@@ -332,6 +420,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * spin_end(); + *preempted = true; + seen_sleepy_node(lock); + smp_rmb(); if (yield_cpu == node->yield_cpu) {@@ -353,6 +444,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * spin_end(); + *preempted = true; + seen_sleepy_node(lock); + smp_rmb(); /* See yield_to_locked_owner comment */ if (!node->locked) {@@ -369,6 +463,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { + bool preempted; + bool seen_preempted = false; + bool sleepy = false; int iters = 0; if (!STEAL_SPINS) {@@ -376,7 +473,6 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav spin_begin(); for (;;) { u32 val = READ_ONCE(lock->val); - bool preempted; if (val & _Q_MUST_Q_VAL) break;@@ -395,7 +491,6 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav spin_begin(); for (;;) { u32 val = READ_ONCE(lock->val); - bool preempted; if (val & _Q_MUST_Q_VAL) break;@@ -408,9 +503,29 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav continue; } + if (paravirt && pv_sleepy_lock && !sleepy) { + if (!sleepy) {The enclosing conditional means this would always be true. I think the out conditional should be if (paravirt && pv_sleepy_lock) otherwise the pv_sleepy_lock_sticky part wouldn't work properly.
Good catch, I think you're right.
quoted
+ if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (lock_try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + + yield_to_locked_owner(lock, val, paravirt, &preempted); + if (preempted) + seen_preempted = true;This could belong to the next if statement, there can not be !paravirt && preempted ?
Yep. Thanks, Nick