Inter-revision diff: patch 2

Comparing v4 (message) to rfc (message)

--- v4
+++ vrfc
@@ -1,58 +1,483 @@
-Kprobe placed on the  kretprobe_trampoline during boot time can be 
-optimized, since the instruction at probe point is a 'nop'.
+Instruction slot for detour buffer is allocated from
+the reserved area. For the time being 64KB is reserved
+in memory for this purpose. ppc_get_optinsn_slot() and
+ppc_free_optinsn_slot() are geared towards the allocation and freeing
+of memory from this area.
 
-Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
-Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
 ---
- arch/powerpc/kernel/kprobes.c   | 8 ++++++++
- arch/powerpc/kernel/optprobes.c | 7 +++----
- 2 files changed, 11 insertions(+), 4 deletions(-)
+ arch/powerpc/kernel/optprobes.c | 463 ++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 463 insertions(+)
+ create mode 100644 arch/powerpc/kernel/optprobes.c
 
-diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
-index e785cc9..5b0fd07 100644
---- a/arch/powerpc/kernel/kprobes.c
-+++ b/arch/powerpc/kernel/kprobes.c
-@@ -282,6 +282,7 @@ asm(".global kretprobe_trampoline\n"
- 	".type kretprobe_trampoline, @function\n"
- 	"kretprobe_trampoline:\n"
- 	"nop\n"
-+	"blr\n"
- 	".size kretprobe_trampoline, .-kretprobe_trampoline\n");
- 
- /*
-@@ -334,6 +335,13 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
- 
- 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
- 	regs->nip = orig_ret_address;
-+	/*
-+	 * Make LR point to the orig_ret_address.
-+	 * When the 'nop' inside the kretprobe_trampoline
-+	 * is optimized, we can do a 'blr' after executing the
-+	 * detour buffer code.
-+	 */
-+	regs->link = orig_ret_address;
- 
- 	reset_current_kprobe();
- 	kretprobe_hash_unlock(current, &flags);
 diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
-index ecba221..5e4c254 100644
---- a/arch/powerpc/kernel/optprobes.c
+new file mode 100644
+index 0000000..50a60c1
+--- /dev/null
 +++ b/arch/powerpc/kernel/optprobes.c
-@@ -72,12 +72,11 @@ static unsigned long can_optimize(struct kprobe *p)
- 
- 	/*
- 	 * kprobe placed for kretprobe during boot time
--	 * is not optimizing now.
--	 *
--	 * TODO: Optimize kprobe in kretprobe_trampoline
-+	 * has a 'nop' instruction, which can be emulated.
-+	 * So further checks can be skipped.
- 	 */
- 	if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
--		return 0;
-+		return (unsigned long)p->addr + sizeof(kprobe_opcode_t);
- 
- 	/*
- 	 * We only support optimizing kernel addresses, but not
+@@ -0,0 +1,463 @@
++/*
++ * Code for Kernel probes Jump optimization.
++ *
++ * Copyright 2016, Anju T, IBM Corp.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#include <linux/kprobes.h>
++#include <linux/jump_label.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <asm/kprobes.h>
++#include <asm/ptrace.h>
++#include <asm/cacheflush.h>
++#include <asm/code-patching.h>
++#include <asm/sstep.h>
++
++/* Reserve an area to allocate slots for detour buffer */
++extern void  optprobe_trampoline_holder(void)
++{
++	asm volatile(".global optinsn_slot\n"
++			"optinsn_slot:\n"
++			".space 65536");
++}
++
++#define SLOT_SIZE 65536
++#define TMPL_CALL_HDLR_IDX	\
++	(optprobe_template_call_handler - optprobe_template_entry)
++#define TMPL_EMULATE_IDX	\
++	(optprobe_template_call_emulate - optprobe_template_entry)
++#define TMPL_RET_BRANCH_IDX	\
++	(optprobe_template_ret_branch - optprobe_template_entry)
++#define TMPL_RET_IDX	\
++	(optprobe_template_ret - optprobe_template_entry)
++#define TMPL_OP1_IDX	\
++	(optprobe_template_op_address1 - optprobe_template_entry)
++#define TMPL_OP2_IDX	\
++	(optprobe_template_op_address2 - optprobe_template_entry)
++#define TMPL_INSN_IDX	\
++	(optprobe_template_insn - optprobe_template_entry)
++#define TMPL_END_IDX	\
++	(optprobe_template_end - optprobe_template_entry)
++
++struct kprobe_ppc_insn_page {
++	struct list_head list;
++	kprobe_opcode_t *insns;	/* Page of instruction slots */
++	struct kprobe_insn_cache *cache;
++	int nused;
++	int ngarbage;
++	char slot_used[];
++};
++
++#define PPC_KPROBE_INSN_PAGE_SIZE(slots)	\
++	(offsetof(struct kprobe_ppc_insn_page, slot_used) +	\
++	(sizeof(char) * (slots)))
++
++enum ppc_kprobe_slot_state {
++	SLOT_CLEAN = 0,
++	SLOT_DIRTY = 1,
++	SLOT_USED = 2,
++};
++
++static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
++	.mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
++	.pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
++	/* .insn_size is initialized later */
++	.nr_garbage = 0,
++};
++
++static int ppc_slots_per_page(struct kprobe_insn_cache *c)
++{
++	/*
++	 * Here the #slots per page differs from x86 as we have
++	 * only 64KB reserved.
++	 */
++	return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
++}
++
++/* Return 1 if all garbages are collected, otherwise 0. */
++static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
++{
++	kip->slot_used[idx] = SLOT_CLEAN;
++	kip->nused--;
++	return 0;
++}
++
++static int collect_garbage_slots(struct kprobe_insn_cache *c)
++{
++	struct kprobe_ppc_insn_page *kip, *next;
++
++	/* Ensure no-one is interrupted on the garbages */
++	synchronize_sched();
++
++	list_for_each_entry_safe(kip, next, &c->pages, list) {
++		int i;
++
++		if (kip->ngarbage == 0)
++			continue;
++		kip->ngarbage = 0;	/* we will collect all garbages */
++		for (i = 0; i < ppc_slots_per_page(c); i++) {
++			if (kip->slot_used[i] == SLOT_DIRTY &&
++			    collect_one_slot(kip, i))
++				break;
++		}
++	}
++	c->nr_garbage = 0;
++	return 0;
++}
++
++kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
++{
++	struct kprobe_ppc_insn_page *kip;
++	kprobe_opcode_t *slot = NULL;
++
++	mutex_lock(&c->mutex);
++	list_for_each_entry(kip, &c->pages, list) {
++		if (kip->nused < ppc_slots_per_page(c)) {
++			int i;
++
++			for (i = 0; i < ppc_slots_per_page(c); i++) {
++				if (kip->slot_used[i] == SLOT_CLEAN) {
++					kip->slot_used[i] = SLOT_USED;
++					kip->nused++;
++					slot = kip->insns + (i * c->insn_size);
++					goto out;
++				}
++			}
++			/* kip->nused reached max value. */
++			kip->nused = ppc_slots_per_page(c);
++			pr_info("No more slots to allocate\n");
++			WARN_ON(1);
++			goto out;
++		}
++	}
++	kip = kmalloc(PPC_KPROBE_INSN_PAGE_SIZE(ppc_slots_per_page(c)),
++		      GFP_KERNEL);
++	if (!kip)
++		goto out;
++	/*
++	 * Allocate from the reserved area so as to
++	 * ensure the range will be within +/-32MB
++	 */
++	kip->insns = &optinsn_slot;
++	if (!kip->insns) {
++		kfree(kip);
++		goto out;
++	}
++	INIT_LIST_HEAD(&kip->list);
++	memset(kip->slot_used, SLOT_CLEAN, ppc_slots_per_page(c));
++	kip->slot_used[0] = SLOT_USED;
++	kip->nused = 1;
++	kip->ngarbage = 0;
++	kip->cache = c;
++	list_add(&kip->list, &c->pages);
++	slot = kip->insns;
++out:
++	mutex_unlock(&c->mutex);
++	return slot;
++}
++
++kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
++{
++	/*
++	 * The insn slot is allocated from the reserved
++	 * area(ie &optinsn_slot).We are not optimizing probes
++	 * at module_addr now.
++	 */
++	kprobe_opcode_t *slot = NULL;
++
++	if (is_kernel_addr(op->kp.addr))
++		slot = __ppc_get_optinsn_slot(&kprobe_ppc_optinsn_slots);
++	else
++		pr_info("Kprobe can not be optimized\n");
++	return slot;
++}
++NOKPROBE_SYMBOL(ppc_get_optinsn_slot);
++
++void __ppc_free_optinsn_slot(struct kprobe_insn_cache *c,
++			     kprobe_opcode_t *slot, int dirty)
++{
++	struct kprobe_ppc_insn_page *kip;
++
++	mutex_lock(&c->mutex);
++
++	list_for_each_entry(kip, &c->pages, list) {
++		long idx = ((long)slot - (long)kip->insns) /
++				(c->insn_size * sizeof(kprobe_opcode_t));
++		if (idx >= 0 && idx < ppc_slots_per_page(c)) {
++			WARN_ON(kip->slot_used[idx] != SLOT_USED);
++			if (dirty) {
++				kip->slot_used[idx] = SLOT_DIRTY;
++				kip->ngarbage++;
++				if (++c->nr_garbage > ppc_slots_per_page(c))
++					collect_garbage_slots(c);
++			} else
++				collect_one_slot(kip, idx);
++			goto out;
++		}
++	}
++	/* Could not free this slot. */
++	WARN_ON(1);
++out:
++	mutex_unlock(&c->mutex);
++}
++
++static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
++{
++	if (!op->optinsn.insn)
++		return;
++	if (is_kernel_addr((unsigned long)op->kp.addr))
++		__ppc_free_optinsn_slot(&kprobe_ppc_optinsn_slots,
++					op->optinsn.insn, 0);
++}
++NOKPROBE_SYMBOL(ppc_free_optinsn_slot);
++
++static void
++__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
++{
++	if (op->optinsn.insn) {
++		ppc_free_optinsn_slot(op);
++		op->optinsn.insn = NULL;
++	}
++}
++
++static int can_optimize(struct kprobe *p)
++{
++	/*
++	 * Not optimizing the kprobe placed by
++	 * kretprobe during boot time
++	 */
++	struct pt_regs *regs;
++	unsigned int instr;
++	int r;
++
++	if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)&kretprobe_trampoline)
++		return 0;
++
++	regs = current_pt_regs();
++	instr = *(p->ainsn.insn);
++
++	/* Ensure the instruction can be emulated*/
++	r = emulate_step(regs, instr);
++	if (r != 1)
++		return 0;
++
++	return 1;
++}
++
++static void
++create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs)
++{
++	/*
++	 * Create a branch back to the return address
++	 * after the probed instruction is emulated
++	 */
++
++	kprobe_opcode_t branch, *buff;
++	unsigned long ret;
++
++	ret = regs->nip;
++	buff = op->optinsn.insn;
++
++	branch = create_branch((unsigned int *)buff + TMPL_RET_IDX,
++			       (unsigned long)ret, 0);
++	buff[TMPL_RET_IDX] = branch;
++}
++NOKPROBE_SYMBOL(create_return_branch);
++
++static void
++optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
++{
++	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
++	unsigned long flags;
++
++	local_irq_save(flags);
++
++	if (kprobe_running())
++		kprobes_inc_nmissed_count(&op->kp);
++	else {
++		__this_cpu_write(current_kprobe, &op->kp);
++		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
++		opt_pre_handler(&op->kp, regs);
++		__this_cpu_write(current_kprobe, NULL);
++	}
++	local_irq_restore(flags);
++}
++
++void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
++{
++	 __arch_remove_optimized_kprobe(op, 1);
++}
++
++void  create_insn(unsigned int insn, kprobe_opcode_t *addr)
++{
++	unsigned int instr, instr2;
++
++	instr = 0x3c000000 | 0x800000 | ((insn >> 16) & 0xffff);
++	*addr++ = instr;
++	instr2 = 0x60000000 | 0x40000 | 0x800000;
++	instr2 = instr2 | (insn & 0xffff);
++	*addr = instr2;
++}
++
++void create_load_address_insn(struct optimized_kprobe *op,
++			      kprobe_opcode_t *addr, kprobe_opcode_t *addr2)
++{
++	unsigned int instr1, instr2, instr3, instr4, instr5;
++	unsigned long val;
++
++	val = op;
++
++	instr1 = 0x3c000000 | 0x600000 | ((val >> 48) & 0xffff);
++	*addr++ = instr1;
++	*addr2++ = instr1;
++	instr2 = 0x60000000 | 0x30000 | 0x600000 | ((val >> 32) & 0xffff);
++	*addr++ = instr2;
++	*addr2++ = instr2;
++	instr3 = 0x78000004 | 0x30000 | 0x600000 | ((32 & 0x1f) << 11);
++	instr3 = instr3 | ((31 & 0x1f) << 6) | ((32 & 0x20) >> 4);
++	*addr++ = instr3;
++	*addr2++ = instr3;
++	instr4 = 0x64000000 |  0x30000 | 0x600000 | ((val >> 16) & 0xffff);
++	*addr++ = instr4;
++	*addr2++ = instr4;
++	instr5 =  0x60000000 | 0x30000 | 0x600000 | (val & 0xffff);
++	*addr = instr5;
++	*addr2 = instr5;
++}
++
++int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
++{
++	kprobe_opcode_t *buff, branch, branch2, branch3;
++	long rel_chk, ret_chk;
++
++	kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
++	op->optinsn.insn = NULL;
++
++	if (!can_optimize(p))
++		return -EILSEQ;
++
++	/* Allocate instruction slot for detour buffer*/
++	buff = ppc_get_optinsn_slot(op);
++
++	/*
++	 * OPTPROBE use a 'b' instruction to branch to optinsn.insn.
++	 *
++	 * The target address has to be relatively nearby, to permit use
++	 * of branch instruction in powerpc because the address is specified
++	 * in an immediate field in the instruction opcode itself, ie 24 bits
++	 * in the opcode specify the address. Therefore the address gap should
++	 * be 32MB on either side of the current instruction.
++	 */
++	rel_chk = (long)buff -
++			(unsigned long)p->addr;
++	if (rel_chk < -0x2000000 || rel_chk > 0x1fffffc || rel_chk & 0x3) {
++		ppc_free_optinsn_slot(op);
++		return -ERANGE;
++	}
++	/*
++	 * For the time being assume that the return address is NIP+4.
++	 * TODO : check the return address is also within 32MB range for
++	 * cases where NIP is not NIP+4.ie the NIP is decided after emulation.
++	 */
++	ret_chk = (long)(buff + TMPL_RET_IDX) - (unsigned long)(p->addr) + 4;
++
++	if (ret_chk < -0x2000000 || ret_chk > 0x1fffffc || ret_chk & 0x3) {
++		ppc_free_optinsn_slot(op);
++		return -ERANGE;
++	}
++
++	/* Do Copy arch specific instance from template*/
++	memcpy(buff, optprobe_template_entry,
++	       TMPL_END_IDX * sizeof(kprobe_opcode_t));
++	create_load_address_insn(op, buff + TMPL_OP1_IDX, buff + TMPL_OP2_IDX);
++
++	/* Create a branch to the optimized_callback function */
++	branch = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX,
++			       (unsigned long)optimized_callback + 8,
++				BRANCH_SET_LINK);
++
++	/* Place the branch instr into the trampoline */
++	buff[TMPL_CALL_HDLR_IDX] = branch;
++	create_insn(*(p->ainsn.insn), buff + TMPL_INSN_IDX);
++
++	/*Create a branch instruction into the emulate_step*/
++	branch3 = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX,
++				(unsigned long)emulate_step + 8,
++				BRANCH_SET_LINK);
++	buff[TMPL_EMULATE_IDX] = branch3;
++
++	/* Create a branch for jumping back*/
++	branch2 = create_branch((unsigned int *)buff + TMPL_RET_BRANCH_IDX,
++				(unsigned long)create_return_branch + 8,
++				BRANCH_SET_LINK);
++	buff[TMPL_RET_BRANCH_IDX] = branch2;
++
++	op->optinsn.insn = buff;
++	return 0;
++}
++
++int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
++{
++	return optinsn->insn;
++}
++
++/*
++ * Here,kprobe opt always replace one instruction (4 bytes
++ * aligned and 4 bytes long). It is impossible to encounter another
++ * kprobe in the address range. So always return 0.
++ */
++int arch_check_optimized_kprobe(struct optimized_kprobe *op)
++{
++	return 0;
++}
++
++void arch_optimize_kprobes(struct list_head *oplist)
++{
++	struct optimized_kprobe *op;
++	struct optimized_kprobe *tmp;
++
++	unsigned int branch;
++
++	list_for_each_entry_safe(op, tmp, oplist, list) {
++	/*
++	* Backup instructions which will be replaced
++	* by jump address
++	*/
++	memcpy(op->optinsn.copied_insn, op->kp.addr,
++	       RELATIVEJUMP_SIZE);
++	branch = create_branch((unsigned int *)op->kp.addr,
++			       (unsigned long)op->optinsn.insn, 0);
++	*op->kp.addr = branch;
++	list_del_init(&op->list);
++	}
++}
++
++void arch_unoptimize_kprobe(struct optimized_kprobe *op)
++{
++	arch_arm_kprobe(&op->kp);
++}
++
++void arch_unoptimize_kprobes(struct list_head *oplist,
++			     struct list_head *done_list)
++{
++	struct optimized_kprobe *op;
++	struct optimized_kprobe *tmp;
++
++	list_for_each_entry_safe(op, tmp, oplist, list) {
++		arch_unoptimize_kprobe(op);
++		list_move(&op->list, done_list);
++	}
++}
++
++int arch_within_optimized_kprobe(struct optimized_kprobe *op,
++				 unsigned long addr)
++{
++	return 0;
++}
 -- 
-2.7.4
+2.1.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help