Re: [PATCH 03/17] powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx.
From: Jordan Niethe <hidden>
Date: 2022-08-10 03:28:54
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
quoted hunk ↗ jump to hunk
The first 16 bits of the lock are only modified by the owner, and other modifications always use atomic operations on the entire 32 bits, so unlocks can use plain stores on the 16 bits. This is the same kind of optimisation done by core qspinlock code. --- arch/powerpc/include/asm/qspinlock.h | 6 +----- arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-)diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index f06117aa60e1..79a1936fb68d 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h@@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { - for (;;) { - int val = atomic_read(&lock->val); - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) - return; - } + smp_store_release(&lock->locked, 0);
Is it also possible for lock_set_locked() to use a non-atomic acquire operation?
quoted hunk ↗ jump to hunk
} #define arch_spin_is_locked(l) queued_spin_is_locked(l)diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 9630e714c70d..3425dab42576 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h@@ -3,12 +3,27 @@ #define _ASM_POWERPC_QSPINLOCK_TYPES_H #include <linux/types.h> +#include <asm/byteorder.h> typedef struct qspinlock { - atomic_t val; + union { + atomic_t val; + +#ifdef __LITTLE_ENDIAN + struct { + u16 locked; + u8 reserved[2]; + }; +#else + struct { + u8 reserved[2]; + u16 locked; + }; +#endif + }; } arch_spinlock_t;
Just to double check we have: #define _Q_LOCKED_OFFSET 0 #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_MASK 0x00000001 #define _Q_LOCKED_VAL 1 #define _Q_TAIL_CPU_OFFSET 16 #define _Q_TAIL_CPU_BITS 16 #define _Q_TAIL_CPU_MASK 0xffff0000 so the ordering here looks correct.
-#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) }
+#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } }
/*
* Bitfields in the atomic value: