Thread (78 messages) 78 messages, 4 authors, 2022-11-10

Re: [PATCH 17/17] powerpc/qspinlock: provide accounting and options for sleepy locks

From: "Nicholas Piggin" <npiggin@gmail.com>
Date: 2022-11-10 11:42:33

On Thu Nov 10, 2022 at 10:44 AM AEST, Jordan Niethe wrote:
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
[resend as utf-8, not utf-7]
quoted
Finding the owner or a queued waiter on a lock with a preempted vcpu
is indicative of an oversubscribed guest causing the lock to get into
trouble. Provide some options to detect this situation and have new
CPUs avoid queueing for a longer time (more steal iterations) to
minimise the problems caused by vcpu preemption on the queue.
---
 arch/powerpc/include/asm/qspinlock_types.h |   7 +-
 arch/powerpc/lib/qspinlock.c               | 240 +++++++++++++++++++--
 2 files changed, 232 insertions(+), 15 deletions(-)
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index 35f9525381e6..4fbcc8a4230b 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -30,7 +30,7 @@ typedef struct qspinlock {
  *
  *     0: locked bit
  *  1-14: lock holder cpu
- *    15: unused bit
+ *    15: lock owner or queuer vcpus observed to be preempted bit
  *    16: must queue bit
  * 17-31: tail cpu (+1)
  */
@@ -49,6 +49,11 @@ typedef struct qspinlock {
 #error "qspinlock does not support such large CONFIG_NR_CPUS"
 #endif
 
+#define _Q_SLEEPY_OFFSET	15
+#define _Q_SLEEPY_BITS		1
+#define _Q_SLEEPY_MASK		_Q_SET_MASK(SLEEPY_OWNER)
+#define _Q_SLEEPY_VAL		(1U << _Q_SLEEPY_OFFSET)
+
 #define _Q_MUST_Q_OFFSET	16
 #define _Q_MUST_Q_BITS		1
 #define _Q_MUST_Q_MASK		_Q_SET_MASK(MUST_Q)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 5cfd69931e31..c18133c01450 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -5,6 +5,7 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 #include <linux/topology.h>
+#include <linux/sched/clock.h>
 #include <asm/qspinlock.h>
 #include <asm/paravirt.h>
 
@@ -36,24 +37,54 @@ static int HEAD_SPINS __read_mostly = (1<<8);
 static bool pv_yield_owner __read_mostly = true;
 static bool pv_yield_allow_steal __read_mostly = false;
 static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
The sticky part could potentially be its own patch.
I'll see how that looks.
quoted
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
 static bool pv_yield_prev __read_mostly = true;
 static bool pv_yield_propagate_owner __read_mostly = true;
 static bool pv_prod_head __read_mostly = false;
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
 
-static __always_inline int get_steal_spins(bool paravirt, bool remote)
+static __always_inline bool recently_sleepy(void)
+{
Other users of pv_sleepy_lock_interval_ns first check pv_sleepy_lock.
In this case it should be implied, I've added a comment.
quoted
+	if (pv_sleepy_lock_interval_ns) {
+		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+		if (seen) {
+			u64 delta = sched_clock() - seen;
+			if (delta < pv_sleepy_lock_interval_ns)
+				return true;
+			this_cpu_write(sleepy_lock_seen_clock, 0);
+		}
+	}
+
+	return false;
+}
+
+static __always_inline int get_steal_spins(bool paravirt, bool remote, bool sleepy)
It seems like paravirt is implied by sleepy.
quoted
 {
 	if (remote) {
-		return REMOTE_STEAL_SPINS;
+		if (paravirt && sleepy)
+			return REMOTE_STEAL_SPINS * pv_sleepy_lock_factor;
+		else
+			return REMOTE_STEAL_SPINS;
 	} else {
-		return STEAL_SPINS;
+		if (paravirt && sleepy)
+			return STEAL_SPINS * pv_sleepy_lock_factor;
+		else
+			return STEAL_SPINS;
 	}
 }
I think that separate functions would still be nicer but this could get rid of
the nesting conditionals like


	int spins;
	if (remote)
		spins = REMOTE_STEAL_SPINS;
	else
		spins = STEAL_SPINS;

	if (sleepy)
		return spins * pv_sleepy_lock_factor;
	return spins;
Yeah it was getting a bit out of hand.
quoted
 
-static __always_inline int get_head_spins(bool paravirt)
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
 {
-	return HEAD_SPINS;
+	if (paravirt && sleepy)
+		return HEAD_SPINS * pv_sleepy_lock_factor;
+	else
+		return HEAD_SPINS;
 }
 
 static inline u32 encode_tail_cpu(void)
@@ -206,6 +237,60 @@ static __always_inline u32 lock_clear_mustq(struct qspinlock *lock)
 	return prev;
 }
 
+static __always_inline bool lock_try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+	u32 prev;
+	u32 new = old | _Q_SLEEPY_VAL;
+
+	BUG_ON(!(old & _Q_LOCKED_VAL));
+	BUG_ON(old & _Q_SLEEPY_VAL);
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# lock_try_set_sleepy			\n"
+"	cmpw	0,%0,%2							\n"
+"	bne-	2f							\n"
+"	stwcx.	%3,0,%1							\n"
+"	bne-	1b							\n"
+"2:									\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r"(old), "r" (new)
+	: "cr0", "memory");
+
+	if (prev == old)
+		return true;
+	return false;
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+	if (pv_sleepy_lock) {
+		if (pv_sleepy_lock_interval_ns)
+			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+		if (!(val & _Q_SLEEPY_VAL))
+			lock_try_set_sleepy(lock, val);
+	}
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(struct qspinlock *lock)
+{
If yield_to_prev() was made to take a raw val, that val could be passed to
seen_sleepy_node() and it would not need to get it by itself.
Yep.
quoted
+	if (pv_sleepy_lock) {
+		u32 val = READ_ONCE(lock->val);
+
+		if (pv_sleepy_lock_interval_ns)
+			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+		if (val & _Q_LOCKED_VAL) {
+			if (!(val & _Q_SLEEPY_VAL))
+				lock_try_set_sleepy(lock, val);
+		}
+	}
+}
+
 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 {
 	int cpu = get_tail_cpu(val);
@@ -244,6 +329,7 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
 
 	spin_end();
 
+	seen_sleepy_owner(lock, val);
 	*preempted = true;
 
 	/*
@@ -307,11 +393,13 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int
 	}
 }
 
-static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
+static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt, bool *preempted)
 {
 	u32 yield_count;
 	int yield_cpu;
 
+	*preempted = false;
+
 	if (!paravirt)
 		goto relax;
 
@@ -332,6 +420,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 
 	spin_end();
 
+	*preempted = true;
+	seen_sleepy_node(lock);
+
 	smp_rmb();
 
 	if (yield_cpu == node->yield_cpu) {
@@ -353,6 +444,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 
 	spin_end();
 
+	*preempted = true;
+	seen_sleepy_node(lock);
+
 	smp_rmb(); /* See yield_to_locked_owner comment */
 
 	if (!node->locked) {
@@ -369,6 +463,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 
 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 {
+	bool preempted;
+	bool seen_preempted = false;
+	bool sleepy = false;
 	int iters = 0;
 
 	if (!STEAL_SPINS) {
@@ -376,7 +473,6 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 			spin_begin();
 			for (;;) {
 				u32 val = READ_ONCE(lock->val);
-				bool preempted;
 
 				if (val & _Q_MUST_Q_VAL)
 					break;
@@ -395,7 +491,6 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 	spin_begin();
 	for (;;) {
 		u32 val = READ_ONCE(lock->val);
-		bool preempted;
 
 		if (val & _Q_MUST_Q_VAL)
 			break;
@@ -408,9 +503,29 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 			continue;
 		}
 
+		if (paravirt && pv_sleepy_lock && !sleepy) {
+			if (!sleepy) {
The enclosing conditional means this would always be true. I think the out conditional should be
if (paravirt && pv_sleepy_lock)
otherwise the pv_sleepy_lock_sticky part wouldn't work properly.
Good catch, I think you're right.
quoted
+				if (val & _Q_SLEEPY_VAL) {
+					seen_sleepy_lock();
+					sleepy = true;
+				} else if (recently_sleepy()) {
+					sleepy = true;
+				}
+
+			if (pv_sleepy_lock_sticky && seen_preempted &&
+					!(val & _Q_SLEEPY_VAL)) {
+				if (lock_try_set_sleepy(lock, val))
+					val |= _Q_SLEEPY_VAL;
+			}
+
+
 		yield_to_locked_owner(lock, val, paravirt, &preempted);
+		if (preempted)
+			seen_preempted = true;
This could belong to the next if statement, there can not be !paravirt && preempted ?
Yep.

Thanks,
Nick
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help