Re: [PATCH V2] powerpc: Implement {cmp}xchg for u8 and u16

From: Boqun Feng <hidden>
Date: 2016-04-19 09:18:29
Also in: lkml

Hi Xinhui,

On Tue, Apr 19, 2016 at 02:29:34PM +0800, Pan Xinhui wrote:

From: Pan Xinhui <redacted>
=20
Implement xchg{u8,u16}{local,relaxed}, and
cmpxchg{u8,u16}{,local,acquire,relaxed}.
=20
It works on all ppc.
=20

Nice work!

AFAICT, your work doesn't depend on anything that ppc-specific, right?
So maybe we can use it as a general approach for a fallback
implementation on the archs without u8/u16 atomics. ;-)

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Pan Xinhui <redacted>
---
change from V1:
	rework totally.
---
 arch/powerpc/include/asm/cmpxchg.h | 83 ++++++++++++++++++++++++++++++++=

++++++

quoted hunk ↗ jump to hunk

 1 file changed, 83 insertions(+)
=20

diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/as=

m/cmpxchg.h

quoted hunk ↗ jump to hunk

index 44efe73..79a1f45 100644

--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h

@@ -7,6 +7,37 @@
 #include <asm/asm-compat.h>
 #include <linux/bug.h>

=20
+#ifdef __BIG_ENDIAN
+#define BITOFF_CAL(size, off)	((sizeof(u32) - size - off) * BITS_PER_BYT=

E)

+#else
+#define BITOFF_CAL(size, off)	(off * BITS_PER_BYTE)
+#endif
+
+static __always_inline unsigned long
+__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
+			unsigned long new);
+
+#define __XCHG_GEN(cmp, type, sfx, u32sfx, skip, v)			\
+static __always_inline u32						\
+__##cmp##xchg_##type##sfx(v void *ptr, u32 old, u32 new)		\
+{									\
+	int size =3D sizeof (type);					\
+	int off =3D (unsigned long)ptr % sizeof(u32);			\
+	volatile u32 *p =3D ptr - off;					\
+	int bitoff =3D BITOFF_CAL(size, off);				\
+	u32 bitmask =3D ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff;	\
+	u32 oldv, newv;							\
+	u32 ret;							\
+	do {								\
+		oldv =3D READ_ONCE(*p);					\
+		ret =3D (oldv & bitmask) >> bitoff;			\
+		if (skip && ret !=3D old)					\
+			break;						\
+		newv =3D (oldv & ~bitmask) | (new << bitoff);		\
+	} while (__cmpxchg_u32##u32sfx((v void*)p, oldv, newv) !=3D oldv);\

Forgive me if this is too paranoid, but I think we can save the
READ_ONCE() in the loop if we change the code into the following,
because cmpxchg will return the "new" value, if the cmp part fails.

	newv =3D READ_ONCE(*p);

	do {
		oldv =3D newv;
		ret =3D (oldv & bitmask) >> bitoff;
		if (skip && ret !=3D old)
			break;
		newv =3D (oldv & ~bitmask) | (new << bitoff);
		newv =3D __cmpxchg_u32##u32sfx((void *)p, oldv, newv);
	} while(newv !=3D oldv);

quoted hunk ↗ jump to hunk

+	return ret;							\
+}
+
 /*
  * Atomic exchange
  *

@@ -14,6 +45,19 @@
  * the previous value stored there.
  */

=20
+#define XCHG_GEN(type, sfx, v)						\
+		__XCHG_GEN(_, type, sfx, _local, 0, v)			\

                                         ^^^^^^^

This should be sfx, right? Otherwise, all the newly added xchg will
call __cmpxchg_u32_local, this will result in wrong ordering guarantees.

+static __always_inline u32 __xchg_##type##sfx(v void *p, u32 n)	\
+{									\
+	return ___xchg_##type##sfx(p, 0, n);				\
+}
+
+XCHG_GEN(u8, _local, volatile);

I don't think we need the "volatile" modifier here, because READ_ONCE()
and __cmpxchg_u32_* all have "volatile" semantics IIUC, so maybe we can
save a paramter for the __XCHG_GEN macro.

Regards,
Boqun

quoted hunk ↗ jump to hunk

+XCHG_GEN(u8, _relaxed, );
+XCHG_GEN(u16, _local, volatile);
+XCHG_GEN(u16, _relaxed, );
+#undef XCHG_GEN
+
 static __always_inline unsigned long
 __xchg_u32_local(volatile void *p, unsigned long val)
 {

@@ -88,6 +132,10 @@ static __always_inline unsigned long
 __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
 {
 	switch (size) {
+	case 1:
+		return __xchg_u8_local(ptr, x);
+	case 2:
+		return __xchg_u16_local(ptr, x);
 	case 4:
 		return __xchg_u32_local(ptr, x);
 #ifdef CONFIG_PPC64
@@ -103,6 +151,10 @@ static __always_inline unsigned long
 __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
 {
 	switch (size) {
+	case 1:
+		return __xchg_u8_relaxed(ptr, x);
+	case 2:
+		return __xchg_u16_relaxed(ptr, x);
 	case 4:
 		return __xchg_u32_relaxed(ptr, x);
 #ifdef CONFIG_PPC64
@@ -226,6 +278,21 @@ __cmpxchg_u32_acquire(u32 *p, unsigned long old, uns=

igned long new)

 	return prev;
 }
=20
+
+#define CMPXCHG_GEN(type, sfx, v)				\
+	__XCHG_GEN(cmp, type, sfx, sfx, 1, v)
+
+CMPXCHG_GEN(u8, , volatile);
+CMPXCHG_GEN(u8, _local, volatile);
+CMPXCHG_GEN(u8, _relaxed, );
+CMPXCHG_GEN(u8, _acquire, );
+CMPXCHG_GEN(u16, , volatile);
+CMPXCHG_GEN(u16, _local, volatile);
+CMPXCHG_GEN(u16, _relaxed, );
+CMPXCHG_GEN(u16, _acquire, );
+#undef CMPXCHG_GEN
+#undef __XCHG_GEN
+
 #ifdef CONFIG_PPC64
 static __always_inline unsigned long
 __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned lon=

g new)

quoted hunk ↗ jump to hunk

@@ -316,6 +383,10 @@ __cmpxchg(volatile void *ptr, unsigned long old, uns=

igned long new,

quoted hunk ↗ jump to hunk

 	  unsigned int size)
 {
 	switch (size) {
+	case 1:
+		return __cmpxchg_u8(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16(ptr, old, new);
 	case 4:
 		return __cmpxchg_u32(ptr, old, new);
 #ifdef CONFIG_PPC64

@@ -332,6 +403,10 @@ __cmpxchg_local(volatile void *ptr, unsigned long ol=

d, unsigned long new,

quoted hunk ↗ jump to hunk

 	  unsigned int size)
 {
 	switch (size) {
+	case 1:
+		return __cmpxchg_u8_local(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16_local(ptr, old, new);
 	case 4:
 		return __cmpxchg_u32_local(ptr, old, new);
 #ifdef CONFIG_PPC64

@@ -348,6 +423,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsi=

gned long new,

quoted hunk ↗ jump to hunk

 		  unsigned int size)
 {
 	switch (size) {
+	case 1:
+		return __cmpxchg_u8_relaxed(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16_relaxed(ptr, old, new);
 	case 4:
 		return __cmpxchg_u32_relaxed(ptr, old, new);
 #ifdef CONFIG_PPC64

@@ -364,6 +443,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsi=

gned long new,

 		  unsigned int size)
 {
 	switch (size) {
+	case 1:
+		return __cmpxchg_u8_acquire(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16_acquire(ptr, old, new);
 	case 4:
 		return __cmpxchg_u32_acquire(ptr, old, new);
 #ifdef CONFIG_PPC64
--=20
2.4.3
=20

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help