Thread (20 messages) 20 messages, 5 authors, 2023-10-06

Re: [PATCH net-next 1/1] net/sched: Disambiguate verdict from return code

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: 2023-09-19 23:21:13
Also in: bpf

On Tue, Sep 19, 2023 at 6:15 PM Daniel Borkmann [off-list ref] wrote:
[ +Martin, bpf ]

On 9/19/23 4:59 PM, Victor Nogueira wrote:
quoted
Currently there is no way to distinguish between an error and a
classification verdict. This patch adds the verdict field as a part of
struct tcf_result. That way, tcf_classify can return a proper
error number when it fails, and we keep the classification result
information encapsulated in struct tcf_result.

Also add values SKB_DROP_REASON_TC_EGRESS_ERROR and
SKB_DROP_REASON_TC_INGRESS_ERROR to enum skb_drop_reason.
With that we can distinguish between a drop from a processing error versus
a drop from classification.

Signed-off-by: Victor Nogueira <redacted>
---
  include/net/dropreason-core.h |  6 +++++
  include/net/sch_generic.h     |  7 ++++++
  net/core/dev.c                | 42 ++++++++++++++++++++++++++---------
  net/sched/cls_api.c           | 38 ++++++++++++++++++++-----------
  net/sched/sch_cake.c          | 32 +++++++++++++-------------
  net/sched/sch_drr.c           | 33 +++++++++++++--------------
  net/sched/sch_ets.c           |  6 +++--
  net/sched/sch_fq_codel.c      | 29 ++++++++++++------------
  net/sched/sch_fq_pie.c        | 28 +++++++++++------------
  net/sched/sch_hfsc.c          |  6 +++--
  net/sched/sch_htb.c           |  6 +++--
  net/sched/sch_multiq.c        |  6 +++--
  net/sched/sch_prio.c          |  7 ++++--
  net/sched/sch_qfq.c           | 34 +++++++++++++---------------
  net/sched/sch_sfb.c           | 29 ++++++++++++------------
  net/sched/sch_sfq.c           | 28 +++++++++++------------
  16 files changed, 195 insertions(+), 142 deletions(-)
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a587e83fc169..b1c069c8e7f2 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -80,6 +80,8 @@
      FN(IPV6_NDISC_BAD_OPTIONS)      \
      FN(IPV6_NDISC_NS_OTHERHOST)     \
      FN(QUEUE_PURGE)                 \
+     FN(TC_EGRESS_ERROR)             \
+     FN(TC_INGRESS_ERROR)            \
      FNe(MAX)

  /**
@@ -345,6 +347,10 @@ enum skb_drop_reason {
      SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
      /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
      SKB_DROP_REASON_QUEUE_PURGE,
+     /** @SKB_DROP_REASON_TC_EGRESS_ERROR: dropped in TC egress HOOK due to error */
+     SKB_DROP_REASON_TC_EGRESS_ERROR,
+     /** @SKB_DROP_REASON_TC_INGRESS_ERROR: dropped in TC ingress HOOK due to error */
+     SKB_DROP_REASON_TC_INGRESS_ERROR,
      /**
       * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
       * shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f232512505f8..9a3f71d2545e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -326,6 +326,7 @@ struct Qdisc_ops {


  struct tcf_result {
+     u32 verdict;
      union {
              struct {
                      unsigned long   class;
@@ -336,6 +337,12 @@ struct tcf_result {
      };
  };

+static inline void tcf_result_set_verdict(struct tcf_result *res,
+                                       const u32 verdict)
+{
+     res->verdict = verdict;
+}
+
  struct tcf_chain;

  struct tcf_proto_ops {
diff --git a/net/core/dev.c b/net/core/dev.c
index ccff2b6ef958..1450f4741d9b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3910,31 +3910,39 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
  #endif /* CONFIG_NET_EGRESS */

  #ifdef CONFIG_NET_XGRESS
-static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
+               struct tcf_result *res)
  {
-     int ret = TC_ACT_UNSPEC;
+     int ret = 0;
  #ifdef CONFIG_NET_CLS_ACT
      struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
-     struct tcf_result res;

-     if (!miniq)
+     if (!miniq) {
+             tcf_result_set_verdict(res, TC_ACT_UNSPEC);
              return ret;
+     }

      tc_skb_cb(skb)->mru = 0;
      tc_skb_cb(skb)->post_ct = false;

      mini_qdisc_bstats_cpu_update(miniq, skb);
-     ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
+     ret = tcf_classify(skb, miniq->block, miniq->filter_list, res, false);
+     if (ret < 0) {
+             mini_qdisc_qstats_cpu_drop(miniq);
+             return ret;
+     }
      /* Only tcf related quirks below. */
-     switch (ret) {
+     switch (res->verdict) {
      case TC_ACT_SHOT:
              mini_qdisc_qstats_cpu_drop(miniq);
              break;
      case TC_ACT_OK:
      case TC_ACT_RECLASSIFY:
-             skb->tc_index = TC_H_MIN(res.classid);
+             skb->tc_index = TC_H_MIN(res->classid);
              break;
      }
+#else
+     tcf_result_set_verdict(res, TC_ACT_UNSPEC);
  #endif /* CONFIG_NET_CLS_ACT */
      return ret;
  }
@@ -3977,6 +3985,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
                 struct net_device *orig_dev, bool *another)
  {
      struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+     struct tcf_result res = {0};
      int sch_ret;

      if (!entry)
@@ -3994,9 +4003,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
              if (sch_ret != TC_ACT_UNSPEC)
                      goto ingress_verdict;
      }
-     sch_ret = tc_run(tcx_entry(entry), skb);
+     sch_ret = tc_run(tcx_entry(entry), skb, &res);
+     if (sch_ret < 0) {
+             kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS_ERROR);
+             *ret = NET_RX_DROP;
+             return NULL;
+     }
  ingress_verdict:
-     switch (sch_ret) {
+     switch (res.verdict) {
This breaks tcx, please move all this logic into tc_run(). No changes to sch_handle_ingress()
or sch_handle_egress should be necessary, you can then just remap the return code to TC_ACT_SHOT
in such case.
I think it is valuable to have a good reason code like
SKB_DROP_REASON_TC_XXX_ERROR to disambiguate between errors vs
verdicts in the case of tc_run() variant.
For tcx_run(), does this look ok (for consistency)?:

if (static_branch_unlikely(&tcx_needed_key)) {
                sch_ret = tcx_run(entry, skb, true);
                if (sch_ret != TC_ACT_UNSPEC) {
                        res.verdict = sch_ret;
                        goto ingress_verdict;
                }
}

cheers,
jamal
quoted
      case TC_ACT_REDIRECT:
              /* skb_mac_header check was done by BPF, so we can safely
               * push the L2 header back before redirecting to another
@@ -4032,6 +4046,7 @@ static __always_inline struct sk_buff *
  sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
  {
      struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+     struct tcf_result res = {0};
      int sch_ret;

      if (!entry)
@@ -4045,9 +4060,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
              if (sch_ret != TC_ACT_UNSPEC)
                      goto egress_verdict;
      }
-     sch_ret = tc_run(tcx_entry(entry), skb);
+     sch_ret = tc_run(tcx_entry(entry), skb, &res);
+     if (sch_ret < 0) {
+             kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS_ERROR);
+             *ret = NET_XMIT_DROP;
+             return NULL;
+     }
  egress_verdict:
-     switch (sch_ret) {
+     switch (res.verdict) {
      case TC_ACT_REDIRECT:
              /* No need to push/pop skb's mac_header here on egress! */
              skb_do_redirect(skb);
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help