Thread (25 messages) 25 messages, 5 authors, 2022-10-06

Re: [PATCH v3 1/6] powerpc/code-patching: Implement generic text patching function

From: Christophe Leroy <hidden>
Date: 2022-10-05 17:56:32

Hi,

Le 05/10/2022 à 07:32, Benjamin Gray a écrit :
Adds a generic text patching mechanism for patches of size int or long
bytes.

The patch_instruction function is reimplemented in terms of this
more generic function. This generic implementation allows patching of
arbitrary long data, such as pointers on 64-bit.

On 32-bit patch_int is marked noinline to prevent a mis-optimisation.
Without noinline, inside patch_branch the compiler may inline all the
way to do_patch_memory, preventing the compiler from inlining
do_patch_memory into patch_int. This would needlessly force patch_int
to be a branch to do_patch_memory.
I'm on business trip this week so I can't test it on hardware, but the 
generated code looks horrid and sub-optimal, with a stack frame and so 
many registers saved into it. That's mpc885_ads_defconfig built with GCC 
12, without modules without stackprotector with 4k pages.

00000168 <__patch_memory.constprop.0>:
  168:	90 83 00 00 	stw     r4,0(r3)
  16c:	7c 00 18 6c 	dcbst   0,r3
  170:	7c 00 04 ac 	hwsync
  174:	7c 00 2f ac 	icbi    0,r5
  178:	7c 00 04 ac 	hwsync
  17c:	4c 00 01 2c 	isync
  180:	38 60 00 00 	li      r3,0
  184:	4e 80 00 20 	blr
  188:	38 60 ff ff 	li      r3,-1
  18c:	4e 80 00 20 	blr

00000190 <raw_patch_instruction>:
  190:	90 83 00 00 	stw     r4,0(r3)
  194:	7c 00 18 6c 	dcbst   0,r3
  198:	7c 00 04 ac 	hwsync
  19c:	7c 00 1f ac 	icbi    0,r3
  1a0:	7c 00 04 ac 	hwsync
  1a4:	4c 00 01 2c 	isync
  1a8:	38 60 00 00 	li      r3,0
  1ac:	4e 80 00 20 	blr
  1b0:	38 60 ff ff 	li      r3,-1
  1b4:	4e 80 00 20 	blr

000001b8 <patch_uint>:
  1b8:	7c 65 1b 78 	mr      r5,r3
  1bc:	48 00 00 a4 	b       260 <patch_uint+0xa8>
  1c0:	94 21 ff e0 	stwu    r1,-32(r1)
  1c4:	7c 08 02 a6 	mflr    r0
  1c8:	90 01 00 24 	stw     r0,36(r1)
  1cc:	93 81 00 10 	stw     r28,16(r1)
  1d0:	93 a1 00 14 	stw     r29,20(r1)
  1d4:	93 c1 00 18 	stw     r30,24(r1)
  1d8:	93 e1 00 1c 	stw     r31,28(r1)
  1dc:	7f 80 00 a6 	mfmsr   r28
  1e0:	7c 51 13 a6 	mtspr   81,r2
  1e4:	3d 20 00 00 	lis     r9,0
			1e6: R_PPC_ADDR16_HA	.data
  1e8:	81 49 00 00 	lwz     r10,0(r9)
			1ea: R_PPC_ADDR16_LO	.data
  1ec:	3d 20 00 00 	lis     r9,0
			1ee: R_PPC_ADDR16_HA	init_mm+0x24
  1f0:	83 ea 00 04 	lwz     r31,4(r10)
  1f4:	80 e9 00 00 	lwz     r7,0(r9)
			1f6: R_PPC_ADDR16_LO	init_mm+0x24
  1f8:	57 e8 65 3a 	rlwinm  r8,r31,12,20,29
  1fc:	7f a7 40 2e 	lwzx    r29,r7,r8
  200:	7c 69 1b 78 	mr      r9,r3
  204:	3d 29 40 00 	addis   r9,r9,16384
  208:	57 fe b5 3a 	rlwinm  r30,r31,22,20,29
  20c:	55 29 00 26 	clrrwi  r9,r9,12
  210:	61 29 01 25 	ori     r9,r9,293
  214:	57 bd 00 26 	clrrwi  r29,r29,12
  218:	3f de c0 00 	addis   r30,r30,-16384
  21c:	7d 3d f1 2e 	stwx    r9,r29,r30
  220:	53 e3 00 26 	rlwimi  r3,r31,0,0,19
  224:	4b ff ff 45 	bl      168 <__patch_memory.constprop.0>
  228:	39 20 00 00 	li      r9,0
  22c:	7d 3d f1 2e 	stwx    r9,r29,r30
  230:	57 ff 00 26 	clrrwi  r31,r31,12
  234:	7c 00 fa 64 	tlbie   r31,r0
  238:	7c 00 04 ac 	hwsync
  23c:	7f 80 01 24 	mtmsr   r28
  240:	80 01 00 24 	lwz     r0,36(r1)
  244:	83 81 00 10 	lwz     r28,16(r1)
  248:	83 a1 00 14 	lwz     r29,20(r1)
  24c:	83 c1 00 18 	lwz     r30,24(r1)
  250:	83 e1 00 1c 	lwz     r31,28(r1)
  254:	7c 08 03 a6 	mtlr    r0
  258:	38 21 00 20 	addi    r1,r1,32
  25c:	4e 80 00 20 	blr
  260:	4b ff ff 08 	b       168 <__patch_memory.constprop.0>


Christophe

quoted hunk ↗ jump to hunk
Signed-off-by: Benjamin Gray <redacted>
---
  arch/powerpc/include/asm/code-patching.h | 29 ++++++++++
  arch/powerpc/lib/code-patching.c         | 73 ++++++++++++++++++------
  2 files changed, 85 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 3f881548fb61..170bfa848c7c 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t *instr, const u32 *addr,
  int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
  		       unsigned long target, int flags);
  int patch_branch(u32 *addr, unsigned long target, int flags);
+
+/* patch_uint and patch_ulong must only be called on addresses where the patch
+ * does not cross a cacheline, otherwise it may not be flushed properly and
+ * mixes of new and stale data may be observed.
+ *
+ * patch_instruction and other instruction patchers automatically satisfy this
+ * requirement due to instruction alignment requirements.
+ */
+
+int patch_uint(void *addr, unsigned int val);
+
+#ifdef CONFIG_PPC64
+
+int patch_ulong(void *addr, unsigned long val);
  int patch_instruction(u32 *addr, ppc_inst_t instr);
+
+#else
+
+static inline int patch_ulong(void *addr, unsigned long val)
+{
+	return patch_uint(addr, val);
+}
+
+static inline int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	return patch_uint(addr, ppc_inst_val(instr));
+}
+
+#endif
+
  int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
  
  static inline unsigned long patch_site_addr(s32 *site)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 125c55e3e148..ecdd2e523d9a 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -15,20 +15,24 @@
  #include <asm/code-patching.h>
  #include <asm/inst.h>
  
-static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
+static int __patch_memory(void *patch_addr, unsigned long val, void *exec_addr,
+			   bool is_dword)
  {
-	if (!ppc_inst_prefixed(instr)) {
-		u32 val = ppc_inst_val(instr);
-
-		__put_kernel_nofault(patch_addr, &val, u32, failed);
-	} else {
-		u64 val = ppc_inst_as_ulong(instr);
+	/* Prefixed instruction may cross cacheline if cacheline smaller than 64 bytes */
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64);
  
+	if (unlikely(is_dword))
  		__put_kernel_nofault(patch_addr, &val, u64, failed);
-	}
+	else
+		__put_kernel_nofault(patch_addr, &val, u32, failed);
  
-	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
-							    "r" (exec_addr));
+	/* Assume data is inside a single cacheline */
+	dcbst(patch_addr);
+	mb(); /* sync */
+	/* Flush on the EA that may be executed in case of a non-coherent icache */
+	icbi(exec_addr);
+	mb(); /* sync */
+	isync();
  
  	return 0;
  
@@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr
  
  int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
  {
-	return __patch_instruction(addr, instr, addr);
+	if (ppc_inst_prefixed(instr))
+		return __patch_memory(addr, ppc_inst_as_ulong(instr), addr, true);
+	else
+		return __patch_memory(addr, ppc_inst_val(instr), addr, false);
  }
  
  static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
@@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long addr)
  	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
  }
  
-static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+static int __do_patch_memory(void *addr, unsigned long val, bool is_dword)
  {
  	int err;
  	u32 *patch_addr;
@@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
  	if (radix_enabled())
  		asm volatile("ptesync": : :"memory");
  
-	err = __patch_instruction(addr, instr, patch_addr);
+	err = __patch_memory(patch_addr, val, addr, is_dword);
  
  	pte_clear(&init_mm, text_poke_addr, pte);
  	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
@@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
  	return err;
  }
  
-int patch_instruction(u32 *addr, ppc_inst_t instr)
+static int do_patch_memory(void *addr, unsigned long val, bool is_dword)
  {
  	int err;
  	unsigned long flags;
@@ -186,15 +193,47 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
  	 */
  	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
  	    !static_branch_likely(&poking_init_done))
-		return raw_patch_instruction(addr, instr);
+		return __patch_memory(addr, val, addr, is_dword);
  
  	local_irq_save(flags);
-	err = __do_patch_instruction(addr, instr);
+	err = __do_patch_memory(addr, val, is_dword);
  	local_irq_restore(flags);
  
  	return err;
  }
-NOKPROBE_SYMBOL(patch_instruction);
+
+#ifdef CONFIG_PPC64
+
+int patch_uint(void *addr, unsigned int val)
+{
+	return do_patch_memory(addr, val, false);
+}
+NOKPROBE_SYMBOL(patch_uint)
+
+int patch_ulong(void *addr, unsigned long val)
+{
+	return do_patch_memory(addr, val, true);
+}
+NOKPROBE_SYMBOL(patch_ulong)
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	if (ppc_inst_prefixed(instr))
+		return patch_ulong(addr, ppc_inst_as_ulong(instr));
+	else
+		return patch_uint(addr, ppc_inst_val(instr));
+}
+NOKPROBE_SYMBOL(patch_instruction)
+
+#else
+
+noinline int patch_uint(void *addr, unsigned int val)
+{
+	return do_patch_memory(addr, val, false);
+}
+NOKPROBE_SYMBOL(patch_uint)
+
+#endif
  
  int patch_branch(u32 *addr, unsigned long target, int flags)
  {
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help