Thread (78 messages) 78 messages, 4 authors, 2022-11-10

Re: [PATCH 06/17] powerpc/qspinlock: theft prevention to control latency

From: "Nicholas Piggin" <npiggin@gmail.com>
Date: 2022-11-10 10:58:16

On Thu Nov 10, 2022 at 10:40 AM AEST, Jordan Niethe wrote:
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
[resend as utf-8, not utf-7]
quoted
Give the queue head the ability to stop stealers. After a number of
spins without sucessfully acquiring the lock, the queue head employs
this, which will assure it is the next owner.
---
 arch/powerpc/include/asm/qspinlock_types.h | 10 +++-
 arch/powerpc/lib/qspinlock.c               | 56 +++++++++++++++++++++-
 2 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index 210adf05b235..8b20f5e22bba 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -29,7 +29,8 @@ typedef struct qspinlock {
  * Bitfields in the lock word:
  *
  *     0: locked bit
- * 16-31: tail cpu (+1)
+ *    16: must queue bit
+ * 17-31: tail cpu (+1)
  */
 #define	_Q_SET_MASK(type)	(((1U << _Q_ ## type ## _BITS) - 1)\
 				      << _Q_ ## type ## _OFFSET)
@@ -38,7 +39,12 @@ typedef struct qspinlock {
 #define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
 #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
 
-#define _Q_TAIL_CPU_OFFSET	16
+#define _Q_MUST_Q_OFFSET	16
+#define _Q_MUST_Q_BITS		1
+#define _Q_MUST_Q_MASK		_Q_SET_MASK(MUST_Q)
+#define _Q_MUST_Q_VAL		(1U << _Q_MUST_Q_OFFSET)
+
+#define _Q_TAIL_CPU_OFFSET	17
 #define _Q_TAIL_CPU_BITS	(32 - _Q_TAIL_CPU_OFFSET)
 #define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
Not a big deal but some of these values could be calculated like in the
generic version. e.g.

	#define _Q_PENDING_OFFSET	(_Q_LOCKED_OFFSET +_Q_LOCKED_BITS)
Yeah, we don't *really* have more than one locked bit though. Haven't
made up my mind about these defines yet.
quoted
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 1625cce714b2..a906cc8f15fa 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -22,6 +22,7 @@ struct qnodes {
 /* Tuning parameters */
 static int STEAL_SPINS __read_mostly = (1<<5);
 static bool MAYBE_STEALERS __read_mostly = true;
+static int HEAD_SPINS __read_mostly = (1<<8);
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
@@ -30,6 +31,11 @@ static __always_inline int get_steal_spins(void)
 	return STEAL_SPINS;
 }
 
+static __always_inline int get_head_spins(void)
+{
+	return HEAD_SPINS;
+}
+
 static inline u32 encode_tail_cpu(void)
 {
 	return (smp_processor_id() + 1) << _Q_TAIL_CPU_OFFSET;
@@ -142,6 +148,23 @@ static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
 	return prev;
 }
 
+static __always_inline u32 lock_set_mustq(struct qspinlock *lock)
+{
+	u32 new = _Q_MUST_Q_VAL;
+	u32 prev;
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# lock_set_mustq			\n"
Is the EH bit not set because we don't hold the lock here?
Right, we're still waiting for it.
quoted
+"	or	%0,%0,%2						\n"
+"	stwcx.	%0,0,%1							\n"
+"	bne-	1b							\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r" (new)
+	: "cr0", "memory");
This is another usage close to the DEFINE_TESTOP() pattern.
quoted
+
+	return prev;
+}
+
 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 {
 	int cpu = get_tail_cpu(val);
@@ -165,6 +188,9 @@ static inline bool try_to_steal_lock(struct qspinlock *lock)
 	for (;;) {
 		u32 val = READ_ONCE(lock->val);
 
+		if (val & _Q_MUST_Q_VAL)
+			break;
+
 		if (unlikely(!(val & _Q_LOCKED_VAL))) {
 			if (trylock_with_tail_cpu(lock, val))
 				return true;
@@ -246,11 +272,22 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 		/* We must be the owner, just set the lock bit and acquire */
 		lock_set_locked(lock);
 	} else {
+		int iters = 0;
+		bool set_mustq = false;
+
 again:
 		/* We're at the head of the waitqueue, wait for the lock. */
-		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
+		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
 			cpu_relax();
 
+			iters++;
It seems instead of using set_mustq, (val & _Q_MUST_Q_VAL) could be checked?
I wanted to give the reader (and compiler for what that's worth) the
idea that it won't change concurrently after we set it.

Thanks,
Nick
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help