Thread (13 messages) 13 messages, 6 authors, 2020-12-23

Re: [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC

From: Boqun Feng <hidden>
Date: 2020-11-13 15:30:50
Also in: linux-arch, lkml

Hi Nicholas,

On Wed, Nov 11, 2020 at 09:07:23PM +1000, Nicholas Piggin wrote:
quoted hunk ↗ jump to hunk
All the cool kids are doing it.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/atomic.h  | 681 ++++++++++-------------------
 arch/powerpc/include/asm/cmpxchg.h |  62 +--
 2 files changed, 248 insertions(+), 495 deletions(-)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 8a55eb8cc97b..899aa2403ba7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -11,185 +11,285 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
+#define ARCH_ATOMIC
+
+#ifndef CONFIG_64BIT
+#include <asm-generic/atomic64.h>
+#endif
+
 /*
  * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
  * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
  * on the platform without lwsync.
  */
 #define __atomic_acquire_fence()					\
-	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
+	asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory")
 
 #define __atomic_release_fence()					\
-	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
+	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory")
 
-static __inline__ int atomic_read(const atomic_t *v)
-{
-	int t;
+#define __atomic_pre_full_fence		smp_mb
 
-	__asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+#define __atomic_post_full_fence	smp_mb
 
Do you need to define __atomic_{pre,post}_full_fence for PPC? IIRC, they
are default smp_mb__{before,atomic}_atomic(), so are smp_mb() defautly
on PPC.
-	return t;
+#define arch_atomic_read(v)			__READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
+#ifdef CONFIG_64BIT
+#define ATOMIC64_INIT(i)			{ (i) }
+#define arch_atomic64_read(v)			__READ_ONCE((v)->counter)
+#define arch_atomic64_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
+#endif
+
[...]
 
+#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \
+static inline int arch_##name##_relaxed(type *v, dtype a, dtype u)	\
I don't think we have atomic_fetch_*_unless_relaxed() at atomic APIs,
ditto for:

	atomic_fetch_add_unless_relaxed()
	atomic_inc_not_zero_relaxed()
	atomic_dec_if_positive_relaxed()

, and we don't have the _acquire() and _release() variants for them
either, and if you don't define their fully-ordered version (e.g.
atomic_inc_not_zero()), atomic-arch-fallback.h will use read and cmpxchg
to implement them, and I think not what we want.

[...]
quoted hunk ↗ jump to hunk
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..181f7e8b3281 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
      		(unsigned long)_x_, sizeof(*(ptr))); 			     \
   })
 
-#define xchg_relaxed(ptr, x)						\
+#define arch_xchg_relaxed(ptr, x)					\
 ({									\
 	__typeof__(*(ptr)) _x_ = (x);					\
 	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
@@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
 	return old;
 }
 
-static __always_inline unsigned long
-__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
-		  unsigned int size)
-{
-	switch (size) {
-	case 1:
-		return __cmpxchg_u8_acquire(ptr, old, new);
-	case 2:
-		return __cmpxchg_u16_acquire(ptr, old, new);
-	case 4:
-		return __cmpxchg_u32_acquire(ptr, old, new);
-#ifdef CONFIG_PPC64
-	case 8:
-		return __cmpxchg_u64_acquire(ptr, old, new);
-#endif
-	}
-	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
-	return old;
-}
-#define cmpxchg(ptr, o, n)						 \
-  ({									 \
-     __typeof__(*(ptr)) _o_ = (o);					 \
-     __typeof__(*(ptr)) _n_ = (n);					 \
-     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
-				    (unsigned long)_n_, sizeof(*(ptr))); \
-  })
-
-
If you remove {atomic_}_cmpxchg_{,_acquire}() and use the version
provided by atomic-arch-fallback.h, then a fail cmpxchg or
cmpxchg_acquire() will still result into a full barrier or a acquire
barrier after the RMW operation, the barrier is not necessary and
probably this is not what we want?

Regards,
Boqun
quoted hunk ↗ jump to hunk
-#define cmpxchg_local(ptr, o, n)					 \
+#define arch_cmpxchg_local(ptr, o, n)					 \
   ({									 \
      __typeof__(*(ptr)) _o_ = (o);					 \
      __typeof__(*(ptr)) _n_ = (n);					 \
@@ -484,7 +456,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 				    (unsigned long)_n_, sizeof(*(ptr))); \
   })
 
-#define cmpxchg_relaxed(ptr, o, n)					\
+#define arch_cmpxchg_relaxed(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -493,38 +465,20 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 			sizeof(*(ptr)));				\
 })
 
-#define cmpxchg_acquire(ptr, o, n)					\
-({									\
-	__typeof__(*(ptr)) _o_ = (o);					\
-	__typeof__(*(ptr)) _n_ = (n);					\
-	(__typeof__(*(ptr))) __cmpxchg_acquire((ptr),			\
-			(unsigned long)_o_, (unsigned long)_n_,		\
-			sizeof(*(ptr)));				\
-})
 #ifdef CONFIG_PPC64
-#define cmpxchg64(ptr, o, n)						\
-  ({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
-  })
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
+	arch_cmpxchg_local((ptr), (o), (n));				\
   })
-#define cmpxchg64_relaxed(ptr, o, n)					\
-({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_relaxed((ptr), (o), (n));				\
-})
-#define cmpxchg64_acquire(ptr, o, n)					\
+#define arch_cmpxchg64_relaxed(ptr, o, n)				\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_acquire((ptr), (o), (n));				\
+	arch_cmpxchg_relaxed((ptr), (o), (n));				\
 })
 #else
 #include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 #endif
 
 #endif /* __KERNEL__ */
-- 
2.23.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help