[PATCH v2 net-next 2/6] bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type
From: Alexei Starovoitov <hidden>
Date: 2016-08-31 21:50:56
Also in:
lkml
Subsystem:
bpf [general] (safe dynamic programs and tools), bpf [security & lsm] (security audit and enforcement using bpf), bpf [tracing], performance events subsystem, the rest, tracing · Maintainers:
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi, KP Singh, Matt Bobrowski, Song Liu, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Linus Torvalds, Steven Rostedt, Masami Hiramatsu
Introduce BPF_PROG_TYPE_PERF_EVENT programs that can be attached to
HW and SW perf events (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE
correspondingly in uapi/linux/perf_event.h)
The program visible context meta structure is
struct bpf_perf_event_data {
struct pt_regs regs;
__u64 sample_period;
};
which is accessible directly from the program:
int bpf_prog(struct bpf_perf_event_data *ctx)
{
... ctx->sample_period ...
... ctx->regs.ip ...
}
The bpf verifier rewrites the accesses into kernel internal
struct bpf_perf_event_data_kern which allows changing
struct perf_sample_data without affecting bpf programs.
New fields can be added to the end of struct bpf_perf_event_data
in the future.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
include/linux/perf_event.h | 5 ++++
include/uapi/linux/Kbuild | 1 +
include/uapi/linux/bpf.h | 1 +
include/uapi/linux/bpf_perf_event.h | 18 +++++++++++
kernel/trace/bpf_trace.c | 60 +++++++++++++++++++++++++++++++++++++
5 files changed, 85 insertions(+)
create mode 100644 include/uapi/linux/bpf_perf_event.h
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b6b43cc0dd5..97bfe62f30d7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h@@ -788,6 +788,11 @@ struct perf_output_handle { int page; }; +struct bpf_perf_event_data_kern { + struct pt_regs *regs; + struct perf_sample_data *data; +}; + #ifdef CONFIG_CGROUP_PERF /*
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 185f8ea2702f..d0352a971ebd 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild@@ -71,6 +71,7 @@ header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h header-y += bpf_common.h +header-y += bpf_perf_event.h header-y += bpf.h header-y += bpqether.h header-y += bsg.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e4c5a1baa993..f896dfac4ac0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h@@ -95,6 +95,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, }; #define BPF_PSEUDO_MAP_FD 1
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 000000000000..067427259820
--- /dev/null
+++ b/include/uapi/linux/bpf_perf_event.h@@ -0,0 +1,18 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ +#define _UAPI__LINUX_BPF_PERF_EVENT_H__ + +#include <linux/types.h> +#include <linux/ptrace.h> + +struct bpf_perf_event_data { + struct pt_regs regs; + __u64 sample_period; +}; + +#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ad35213b8405..0ac414abbf68 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c@@ -1,4 +1,5 @@ /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/bpf.h> +#include <linux/bpf_perf_event.h> #include <linux/filter.h> #include <linux/uaccess.h> #include <linux/ctype.h>
@@ -552,10 +554,68 @@ static struct bpf_prog_type_list tracepoint_tl = { .type = BPF_PROG_TYPE_TRACEPOINT, }; +static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) +{ + if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + if (off == offsetof(struct bpf_perf_event_data, sample_period)) { + if (size != sizeof(u64)) + return false; + } else { + if (size != sizeof(long)) + return false; + } + return true; +} + +static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); + switch (ctx_off) { + case offsetof(struct bpf_perf_event_data, sample_period): + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, data)), + dst_reg, src_reg, + offsetof(struct bpf_perf_event_data_kern, data)); + *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg, + offsetof(struct perf_sample_data, period)); + break; + default: + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs)), + dst_reg, src_reg, + offsetof(struct bpf_perf_event_data_kern, regs)); + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)), + dst_reg, dst_reg, ctx_off); + break; + } + return insn - insn_buf; +} + +static const struct bpf_verifier_ops perf_event_prog_ops = { + .get_func_proto = tp_prog_func_proto, + .is_valid_access = pe_prog_is_valid_access, + .convert_ctx_access = pe_prog_convert_ctx_access, +}; + +static struct bpf_prog_type_list perf_event_tl = { + .ops = &perf_event_prog_ops, + .type = BPF_PROG_TYPE_PERF_EVENT, +}; + static int __init register_kprobe_prog_ops(void) { bpf_register_prog_type(&kprobe_tl); bpf_register_prog_type(&tracepoint_tl); + bpf_register_prog_type(&perf_event_tl); return 0; } late_initcall(register_kprobe_prog_ops);
--
2.8.0