Thread (17 messages) 17 messages, 2 authors, 2018-12-31

Re: [RFC bpf-next v3 03/12] bpf: verifier: remove dead code

From: Yonghong Song <hidden>
Date: 2018-12-31 21:41:41


On 12/31/18 12:31 PM, Jakub Kicinski wrote:
On Sun, 30 Dec 2018 22:02:10 +0000, Yonghong Song wrote:
quoted
On 12/28/18 7:09 PM, Jakub Kicinski wrote:
quoted
+static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
+				      u32 cnt)
+{
+	struct bpf_subprog_info *need_first_linfo;
+	struct bpf_prog *prog = env->prog;
+	u32 i, l_off, l_cnt, nr_linfo;
+	struct bpf_line_info *linfo;
+
+	nr_linfo = prog->aux->nr_linfo;
+	if (!nr_linfo || !cnt)
+		return 0;
+
+	linfo = prog->aux->linfo;
+
+	/* progs are already adjusted/removed, if program starts on off, it may
+	 * had its start cut off and its line info may need to be preserved
+	 */
+	for (i = 0; i < env->subprog_cnt; i++)
+		if (env->subprog_info[i].start >= off)
+			break;
+	if (i < env->subprog_cnt && env->subprog_info[i].start == off)
+		need_first_linfo = &env->subprog_info[i];
+	else
+		need_first_linfo = NULL;
+
+	/* find first line info to remove */
+	for (i = 0; i < nr_linfo; i++)
+		if (linfo[i].insn_off >= off)
+			break;
+
+	/* count lines to be removed */
+	l_off = i;
+	l_cnt = 0;
+	for (; i < nr_linfo; i++)
+		if (linfo[i].insn_off < off + cnt)
+			l_cnt++;
+		else
+			break;
+
+	/* either we didn't actually cut the start or we can just use line info
+	 * of first instruction if it exists
+	 */
+	if (i < nr_linfo && linfo[i].insn_off == off + cnt)
+		need_first_linfo = NULL;
+	if (need_first_linfo) {
+		if (WARN_ONCE(!l_cnt,
+			      "verifier bug - no linfo removed, yet its missing"))
+			return -EINVAL;
+		if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
+			      need_first_linfo->linfo_idx >= l_off + l_cnt,
+			      "verifier bug - removed prog linfo not in removed range"))
+			return -EINVAL;
+		/* subprog linfo_idx is not adjusted yet, so just find out
+		 * which line it used to be and swap it
+		 */
+		memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
+			sizeof(*linfo));
+		need_first_linfo->linfo_idx = l_off;
+		linfo[l_off].insn_off = off;
+
+		l_off++;
+		l_cnt--;
+	}
In this routine, we handle need_first_linfo if the first insn of a
subprogram is deleted.
I wonder whether we need to handle the last deleted linfo as well. For
example:
     func1:
       line_info1:
          insn1
          insn2
     func2:
       line_info2:
          insn3
          insn4:

if insn2 and insn3 are deleted, the above algorithm will produce:
      func1:
        line_info1:
         insn1
      func2:
         insn4

func2 is missing the first line info, which should be line_info2 with
adjusted insn offset to the start of func2.
Mm.. should not happen, we should end up with:

      func1:
        line_info1:
          insn1
      func2:
        line_info2:
          insn4

func2 after func adjust will start at off and there is no line info for
off + cnt (insn4), so we will preserve line_info2.
Thanks for verification, I missed that
 >>> +	/* count lines to be removed */
 >>> +	l_off = i;
 >>> +	l_cnt = 0;
 >>> +	for (; i < nr_linfo; i++)
 >>> +		if (linfo[i].insn_off < off + cnt)
 >>> +			l_cnt++;
 >>> +		else
 >>> +			break;
once you reached linfo[i].insn_off >= off + cnt, no more counting.
since l_cnt starts from 0, so the last one is actually preserved.
I've added the following test_btf test, just to be sure:

{
	.descr = "line_info (dead end + subprog start w/ no linfo)",
	.raw_types = {
		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
		BTF_FUNC_PROTO_ENC(1, 1),			/* [2] */
			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
		BTF_FUNC_ENC(NAME_TBD, 2),			/* [4] */
		BTF_END_RAW,
	},
	BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"),
	.insns = {
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 2), /* dead */
		BPF_CALL_REL(2),
		BPF_EXIT_INSN(),
		BPF_EXIT_INSN(), /* dead */
		/* func */
		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* dead */
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	},
	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
	.func_info_cnt = 2,
	.func_info_rec_size = 8,
	.func_info = { {0, 3}, {5, 4}, },
	.line_info = {
		BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10),
		BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10),
		BTF_END_RAW,
	},
	.line_info_rec_size = sizeof(struct bpf_line_info),
	.nr_jited_ksyms = 2,
},

~# bpftool prog dump xlated id 48
int main(int x):
; /* main linfo */
    0: (b7) r0 = 0
    1: (85) call pc+1#0xffffffffc041e782
    2: (95) exit
int func(int x):
; /* func linfo */
    3: (b7) r0 = 0
    4: (95) exit

~# bpftool prog dump jited id 48
int main(int x):
0xffffffffc041cb2f:
; /* main linfo */
    0:	push   %rbp
    1:	mov    %rsp,%rbp
    4:	sub    $0x28,%rsp
    b:	sub    $0x28,%rbp
    f:	mov    %rbx,0x0(%rbp)
   13:	mov    %r13,0x8(%rbp)
   17:	mov    %r14,0x10(%rbp)
   1b:	mov    %r15,0x18(%rbp)
   1f:	xor    %eax,%eax
   21:	mov    %rax,0x20(%rbp)
   25:	xor    %eax,%eax
   27:	callq  0x0000000000001c53
   2c:	mov    0x0(%rbp),%rbx
   30:	mov    0x8(%rbp),%r13
   34:	mov    0x10(%rbp),%r14
   38:	mov    0x18(%rbp),%r15
   3c:	add    $0x28,%rbp
   40:	leaveq
   41:	retq

int func(int x):
0xffffffffc041e782:
; /* func linfo */
    0:	push   %rbp
    1:	mov    %rsp,%rbp
    4:	sub    $0x28,%rsp
    b:	sub    $0x28,%rbp
    f:	mov    %rbx,0x0(%rbp)
   13:	mov    %r13,0x8(%rbp)
   17:	mov    %r14,0x10(%rbp)
   1b:	mov    %r15,0x18(%rbp)
   1f:	xor    %eax,%eax
   21:	mov    %rax,0x20(%rbp)
   25:	xor    %eax,%eax
   27:	mov    0x0(%rbp),%rbx
   2b:	mov    0x8(%rbp),%r13
   2f:	mov    0x10(%rbp),%r14
   33:	mov    0x18(%rbp),%r15
   37:	add    $0x28,%rbp
   3b:	leaveq
   3c:	retq
quoted
Another example:
     func1:
        line_info1:
           insn1
           insn2
        line_info2:
           insn3
           insn4
If insn2 and insn3 are deleted, we will get
      func1:
        line_info1:
          insn1
          insn4
This is not ideal either. We should attribute insn4 to line_info2.
Cool, I'm a little in the dark on what the definition of line info is,
so your suggestions are very much appreciated!  If you're saying that
all instructions below line info are considered part of that "line",
that'd simplify the code.

So should I always preserve "last" line info if first live instruction
doesn't have one?

linfo1:
	insn1
	insn2 /* dead */
linfo2:
	insn3 /* dead */
	insn4
linfo3:
	insn5

   ||
   \/

linfo1:
	insn1
linfo2:
	insn4
linfo3:
	insn5
Yes, the above is the desirable output.

That'd simplify things quite a bit!
quoted
quoted
+
+	/* remove the line info which refers to the removed instructions */
+	if (l_cnt) {
+		memmove(linfo + l_off, linfo + i,
+			sizeof(*linfo) * (nr_linfo - i));
+
+		prog->aux->nr_linfo -= l_cnt;
+		nr_linfo = prog->aux->nr_linfo;
+	}
+
+	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
+	for (i = l_off; i < nr_linfo; i++)
+		linfo[i].insn_off -= cnt;
+
+	/* fix up all subprogs (incl. 'exit') which start >= off */
+	for (i = 0; i <= env->subprog_cnt; i++)
+		if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
+			env->subprog_info[i].linfo_idx -= l_cnt;
+
+	return 0;
+}
+
+static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	unsigned int orig_prog_len = env->prog->len;
+	int err;
+
+	if (!cnt)
+		return 0;
This check is probably not needed as all call sites (including
patch #4) guarantees non-zero cnt.
Ack, thanks for the review, I was unsure if this is not convention.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help