Thread (11 messages) 11 messages, 3 authors, 2017-04-08

Re: [PATCH v2 2/5] perf/x86/intel: Record branch type

From: Peter Zijlstra <peterz@infradead.org>
Date: 2017-04-07 15:20:45
Also in: lkml

On Fri, Apr 07, 2017 at 06:47:43PM +0800, Jin Yao wrote:
Perf already has support for disassembling the branch instruction
and using the branch type for filtering. The patch just records
the branch type in perf_branch_entry.

Before recording, the patch converts the x86 branch classification
to common branch classification and compute for checking if the
branches cross 4K or 2MB areas. It's an approximate computing for
crossing 4K page or 2MB page.
The changelog is completely empty of rationale. Why do we care?

Not having the binary is a very bad reason; you can't do much of
anything if that's missing.

quoted hunk ↗ jump to hunk
@@ -923,6 +933,84 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 	return ret;
 }
 
+static int
+common_branch_type(int type, u64 from, u64 to)
+{
+	int ret;
+
+	type = type & (~(X86_BR_KERNEL | X86_BR_USER));
+
+	switch (type) {
+	case X86_BR_CALL:
+	case X86_BR_ZERO_CALL:
+		ret = PERF_BR_CALL;
+		break;
+
+	case X86_BR_RET:
+		ret = PERF_BR_RET;
+		break;
+
+	case X86_BR_SYSCALL:
+		ret = PERF_BR_SYSCALL;
+		break;
+
+	case X86_BR_SYSRET:
+		ret = PERF_BR_SYSRET;
+		break;
+
+	case X86_BR_INT:
+		ret = PERF_BR_INT;
+		break;
+
+	case X86_BR_IRET:
+		ret = PERF_BR_IRET;
+		break;
+
+	case X86_BR_IRQ:
+		ret = PERF_BR_IRQ;
+		break;
+
+	case X86_BR_ABORT:
+		ret = PERF_BR_FAR_BRANCH;
+		break;
+
+	case X86_BR_JCC:
+		if (to > from)
+			ret = PERF_BR_JCC_FWD;
+		else
+			ret = PERF_BR_JCC_BWD;
+		break;
This seems like superfluous information; we already get to and from, so
this comparison is pointless.

The rest looks like something you can simpler implement using a lookup
table.
quoted hunk ↗ jump to hunk
+
+	case X86_BR_JMP:
+		ret = PERF_BR_JMP;
+		break;
+
+	case X86_BR_IND_CALL:
+		ret = PERF_BR_IND_CALL;
+		break;
+
+	case X86_BR_IND_JMP:
+		ret = PERF_BR_IND_JMP;
+		break;
+
+	default:
+		ret = PERF_BR_NONE;
+	}
+
+	return ret;
+}
+
+static bool
+cross_area(u64 addr1, u64 addr2, int size)
+{
+	u64 align1, align2;
+
+	align1 = addr1 & ~(size - 1);
+	align2 = addr2 & ~(size - 1);
+
+	return (align1 != align2) ? true : false;
+}
+
 /*
  * implement actual branch filter based on user demand.
  * Hardware may not exactly satisfy that request, thus
@@ -939,7 +1027,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	bool compress = false;
 
 	/* if sampling all branches, then nothing to filter */
-	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
 		return;
 
 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -960,6 +1049,21 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 			cpuc->lbr_entries[i].from = 0;
 			compress = true;
 		}
+
+		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) {
+			cpuc->lbr_entries[i].type = common_branch_type(type,
+								       from,
+								       to);
+			if (cross_area(from, to, AREA_2M))
+				cpuc->lbr_entries[i].cross = PERF_BR_CROSS_2M;
+			else if (cross_area(from, to, AREA_4K))
+				cpuc->lbr_entries[i].cross = PERF_BR_CROSS_4K;
+			else
+				cpuc->lbr_entries[i].cross = PERF_BR_CROSS_NONE;
This again is superfluous information; it is already fully contained in
to and from, which we have.
+		} else {
+			cpuc->lbr_entries[i].type = PERF_BR_NONE;
+			cpuc->lbr_entries[i].cross = PERF_BR_CROSS_NONE;
+		}
 	}
 
 	if (!compress)
-- 
2.7.4
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help