Re: [PATCH v2 1/6] powerpc/code-patching: Implement generic text patching function
From: Christophe Leroy <hidden>
Date: 2022-09-27 07:32:02
Le 26/09/2022 à 08:43, Benjamin Gray a écrit :
quoted hunk ↗ jump to hunk
Adds a generic text patching mechanism for patches of 1, 2, 4, or (64-bit) 8 bytes. The patcher conditionally syncs the icache depending on if the content will be executed (as opposed to, e.g., read-only data). The `patch_instruction` function is reimplemented in terms of this more generic function. This generic implementation allows patching of arbitrary 64-bit data, whereas the original `patch_instruction` decided the size based on the 'instruction' opcode, so was not suitable for arbitrary data. Signed-off-by: Benjamin Gray <redacted> --- arch/powerpc/include/asm/code-patching.h | 7 ++ arch/powerpc/lib/code-patching.c | 90 +++++++++++++++++------- 2 files changed, 71 insertions(+), 26 deletions(-)diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 1c6316ec4b74..15efd8ab22da 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h@@ -76,6 +76,13 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, int patch_branch(u32 *addr, unsigned long target, int flags); int patch_instruction(u32 *addr, ppc_inst_t instr); int raw_patch_instruction(u32 *addr, ppc_inst_t instr); +int __patch_memory(void *dest, unsigned long src, size_t size); + +#define patch_memory(addr, val) \ +({ \ + BUILD_BUG_ON(!__native_word(val)); \ + __patch_memory(addr, (unsigned long) val, sizeof(val)); \ +}) static inline unsigned long patch_site_addr(s32 *site) {diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index ad0cf3108dd0..9979380d55ef 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c@@ -15,20 +15,47 @@ #include <asm/code-patching.h> #include <asm/inst.h> -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) +static int __always_inline ___patch_memory(void *patch_addr, + unsigned long data, + void *prog_addr, + size_t size) { - if (!ppc_inst_prefixed(instr)) { - u32 val = ppc_inst_val(instr); + switch (size) { + case 1: + __put_kernel_nofault(patch_addr, &data, u8, failed); + break; + case 2: + __put_kernel_nofault(patch_addr, &data, u16, failed); + break; + case 4: + __put_kernel_nofault(patch_addr, &data, u32, failed); + break; +#ifdef CONFIG_PPC64 + case 8: + __put_kernel_nofault(patch_addr, &data, u64, failed); + break; +#endif + default: + unreachable(); + } - __put_kernel_nofault(patch_addr, &val, u32, failed); - } else { - u64 val = ppc_inst_as_ulong(instr); + dcbst(patch_addr); + dcbst(patch_addr + size - 1); /* Last byte of data may cross a cacheline */
Would it be possible to minimise the above ?
What is patch_memory used for ? It is not meant to patch raw data, but
things like code text or code data. The only realistic case I see where
we could have a cache split is 8 bytes data. And even that, we should be
able to take care of it.
So, do we need that double dcbst at all ? Or at least do we need it for
the 1,2,4 cases ?
I was wrongly expecting it to be a single additional instruction, but in
fact it is a few more:
In raw_patch_instruction() it remains reasonable:
4: 7c 00 18 6c dcbst 0,r3
8: 39 23 00 03 addi r9,r3,3
c: 7c 00 48 6c dcbst 0,r9
But in patch_memory it is one more:
80: 7c 00 18 6c dcbst 0,r3
84: 38 a5 ff ff addi r5,r5,-1
88: 7c a3 2a 14 add r5,r3,r5
8c: 7c 00 28 6c dcbst 0,r5
quoted hunk ↗ jump to hunk
- __put_kernel_nofault(patch_addr, &val, u64, failed); - } + mb(); /* sync */ + + /* Flush on the EA that may be executed in case of a non-coherent icache */ + icbi(prog_addr); + + /* Also flush the last byte of the instruction if it may be a + * prefixed instruction and we aren't assuming minimum 64-byte + * cacheline sizes + */ + if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) + icbi(prog_addr + size - 1); - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), - "r" (exec_addr)); + mb(); /* sync */ + isync(); return 0;@@ -38,7 +65,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { - return __patch_instruction(addr, instr, addr); + if (ppc_inst_prefixed(instr)) + return ___patch_memory(addr, ppc_inst_as_ulong(instr), addr, sizeof(u64)); + else + return ___patch_memory(addr, ppc_inst_val(instr), addr, sizeof(u32)); } #ifdef CONFIG_STRICT_KERNEL_RWX@@ -147,24 +177,22 @@ static void unmap_patch_area(unsigned long addr) flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int __always_inline __do_patch_memory(void *dest, unsigned long src, size_t size) { int err; u32 *patch_addr; - unsigned long text_poke_addr; pte_t *pte; - unsigned long pfn = get_patch_pfn(addr); - - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; - patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; + unsigned long pfn = get_patch_pfn(dest); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(dest)); pte = virt_to_kpte(text_poke_addr); __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); - err = __patch_instruction(addr, instr, patch_addr); + err = ___patch_memory(patch_addr, src, dest, size); pte_clear(&init_mm, text_poke_addr, pte); flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);@@ -172,7 +200,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int __always_inline do_patch_memory(void *dest, unsigned long src, size_t size) { int err; unsigned long flags;@@ -183,32 +211,42 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) * to allow patching. We just do the plain old patching */ if (!static_branch_likely(&poking_init_done)) - return raw_patch_instruction(addr, instr); + return ___patch_memory(dest, src, dest, size); local_irq_save(flags); - err = __do_patch_instruction(addr, instr); + err = __do_patch_memory(dest, src, size); local_irq_restore(flags); return err; } + #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int do_patch_memory(void *dest, unsigned long src, size_t size) { - return raw_patch_instruction(addr, instr); + return ___patch_memory(dest, src, dest, size); } #endif /* CONFIG_STRICT_KERNEL_RWX */ __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); -int patch_instruction(u32 *addr, ppc_inst_t instr) +int __patch_memory(void *dest, unsigned long src, size_t size) { /* Make sure we aren't patching a freed init section */ - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) + if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4)) return 0; - return do_patch_instruction(addr, instr); + return do_patch_memory(dest, src, size); +} +NOKPROBE_SYMBOL(__patch_memory); + +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + if (ppc_inst_prefixed(instr)) + return patch_memory(addr, ppc_inst_as_ulong(instr)); + else + return patch_memory(addr, ppc_inst_val(instr)); } NOKPROBE_SYMBOL(patch_instruction); --2.37.3