Thread (10 messages) 10 messages, 2 authors, 2021-10-29

Re: [PATCH v2 1/4] tracing/histogram: Optimize division by constants

From: Kalesh Singh <hidden>
Date: 2021-10-29 18:53:30
Also in: linux-kselftest, lkml

On Fri, Oct 29, 2021 at 11:45 AM Steven Rostedt [off-list ref] wrote:
On Fri, 29 Oct 2021 11:33:27 -0700
Kalesh Singh [off-list ref] wrote:
quoted
If the divisor is a constant use specific division functions to
avoid extra branches when the trigger is hit.

If the divisor constant but not a power of 2, the division can be
replaced with a multiplication and shift in the following case:

Let X = dividend and Y = divisor.

Choose Z = some power of 2. If Y <= Z, then:
    X / Y = (X * (Z / Y)) / Z

(Z / Y) is a constant (mult) which is calculated at parse time, so:
    X / Y = (X * mult) / Z

The division by Z can be replaced by a shift since Z is a power of 2:
    X / Y = (X * mult) >> shift

As long, as X < Z the results will not be off by more than 1.

Signed-off-by: Kalesh Singh <redacted>
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
---

Changes in v2:
  - Return -EDOM if divisor is a constant and zero, per Steve

 kernel/trace/trace_events_hist.c | 117 ++++++++++++++++++++++++++++++-
 1 file changed, 116 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 364cb3091789..1084aa41f047 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -68,7 +68,8 @@
      C(INVALID_SORT_FIELD,   "Sort field must be a key or a val"),   \
      C(INVALID_STR_OPERAND,  "String type can not be an operand in expression"), \
      C(EXPECT_NUMBER,        "Expecting numeric literal"),           \
-     C(UNARY_MINUS_SUBEXPR,  "Unary minus not supported in sub-expressions"),
+     C(UNARY_MINUS_SUBEXPR,  "Unary minus not supported in sub-expressions"), \
+     C(DIVISION_BY_ZERO,     "Division by zero"),

 #undef C
 #define C(a, b)              HIST_ERR_##a
@@ -92,6 +93,7 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field,
 #define HIST_FIELDS_MAX              (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
 #define HIST_ACTIONS_MAX     8
 #define HIST_CONST_DIGITS_MAX        21
+#define HIST_DIV_SHIFT               20  /* For optimizing division by constants */

 enum field_op_id {
      FIELD_OP_NONE,
@@ -160,6 +162,8 @@ struct hist_field {

      /* Numeric literals are represented as u64 */
      u64                             constant;
+     /* Used to optimize division by constants */
+     u64                             div_multiplier;
 };

 static u64 hist_field_none(struct hist_field *field,
@@ -311,6 +315,72 @@ static u64 hist_field_div(struct hist_field *hist_field,
      return div64_u64(val1, val2);
 }

+static u64 div_by_power_of_two(struct hist_field *hist_field,
+                             struct tracing_map_elt *elt,
+                             struct trace_buffer *buffer,
+                             struct ring_buffer_event *rbe,
+                             void *event)
+{
+     struct hist_field *operand1 = hist_field->operands[0];
+     struct hist_field *operand2 = hist_field->operands[1];
+
+     u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+     u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
If these functions are only called when val2 is constant, can't we make it
such that we get val2 from the hist_field directly? That is:

        u64 val2 = operand2->constant;
operand2 might be a var ref to a constant, so we would need to resolve
that with hist_field_var_ref().

-Kalesh
That would save us a function call, and an indirect on at that (that gets
slowed down by spectre).

Same for the ones below.

-- Steve

quoted
+
+     return val1 >> __ffs64(val2);
+}
+
+static u64 div_by_not_power_of_two(struct hist_field *hist_field,
+                             struct tracing_map_elt *elt,
+                             struct trace_buffer *buffer,
+                             struct ring_buffer_event *rbe,
+                             void *event)
+{
+     struct hist_field *operand1 = hist_field->operands[0];
+     struct hist_field *operand2 = hist_field->operands[1];
+
+     u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+     u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
+
+     return div64_u64(val1, val2);
+}
+
+static u64 div_by_mult_and_shift(struct hist_field *hist_field,
+                             struct tracing_map_elt *elt,
+                             struct trace_buffer *buffer,
+                             struct ring_buffer_event *rbe,
+                             void *event)
+{
+     struct hist_field *operand1 = hist_field->operands[0];
+     struct hist_field *operand2 = hist_field->operands[1];
+
+     u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+     /*
+      * If the divisor is a constant, do a multiplication and shift instead.
+      *
+      * Choose Z = some power of 2. If Y <= Z, then:
+      *     X / Y = (X * (Z / Y)) / Z
+      *
+      * (Z / Y) is a constant (mult) which is calculated at parse time, so:
+      *     X / Y = (X * mult) / Z
+      *
+      * The division by Z can be replaced by a shift since Z is a power of 2:
+      *     X / Y = (X * mult) >> HIST_DIV_SHIFT
+      *
+      * As long, as X < Z the results will not be off by more than 1.
+      */
+     if (val1 < (1 << HIST_DIV_SHIFT)) {
+             u64 mult = operand2->div_multiplier;
+
+             return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT;
+     } else {
+             u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
+
+             return div64_u64(val1, val2);
+     }
+}
+
 static u64 hist_field_mult(struct hist_field *hist_field,
                         struct tracing_map_elt *elt,
                         struct trace_buffer *buffer,
@@ -573,6 +643,37 @@ struct snapshot_context {
      void                    *key;
 };

+
+static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
+                                      const char *var_name);
+
+/*
+ * Returns the specific division function to use if the divisor
+ * is constant. This avoids extra branches when the trigger is hit.
+ */
+static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
+{
+     u64 div;
+
+     if (divisor->flags & HIST_FIELD_FL_VAR_REF) {
+             struct hist_field *var;
+
+             var = find_var_field(divisor->var.hist_data, divisor->name);
+             div = var->constant;
+     } else
+             div = divisor->constant;
+
+     if (!(div & (div - 1)))
+             return div_by_power_of_two;
+
+     /* If the divisor is too large, do a regular division */
+     if (div > (1 << HIST_DIV_SHIFT))
+             return div_by_not_power_of_two;
+
+     divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
+     return div_by_mult_and_shift;
+}
+
 static void track_data_free(struct track_data *track_data)
 {
      struct hist_elt_data *elt_data;
@@ -2575,6 +2676,20 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
      expr->operands[0] = operand1;
      expr->operands[1] = operand2;

+
+     if (field_op == FIELD_OP_DIV &&
+                     operand2_flags & HIST_FIELD_FL_CONST) {
+             u64 divisor = (var2) ? var2->constant : operand2->constant;
+
+             if (!divisor) {
+                     hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
+                     ret = -EDOM;
+                     goto free;
+             }
+
+             op_fn = hist_field_get_div_fn(operand2);
+     }
+
      if (combine_consts) {
              if (var1)
                      expr->operands[0] = var1;
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help