Re: [PATCH net-next 1/1] net/sched: Disambiguate verdict from return code
From: Daniel Borkmann <daniel@iogearbox.net>
Date: 2023-09-19 22:15:36
Also in:
bpf
[ +Martin, bpf ] On 9/19/23 4:59 PM, Victor Nogueira wrote:
quoted hunk ↗ jump to hunk
Currently there is no way to distinguish between an error and a classification verdict. This patch adds the verdict field as a part of struct tcf_result. That way, tcf_classify can return a proper error number when it fails, and we keep the classification result information encapsulated in struct tcf_result. Also add values SKB_DROP_REASON_TC_EGRESS_ERROR and SKB_DROP_REASON_TC_INGRESS_ERROR to enum skb_drop_reason. With that we can distinguish between a drop from a processing error versus a drop from classification. Signed-off-by: Victor Nogueira <redacted> --- include/net/dropreason-core.h | 6 +++++ include/net/sch_generic.h | 7 ++++++ net/core/dev.c | 42 ++++++++++++++++++++++++++--------- net/sched/cls_api.c | 38 ++++++++++++++++++++----------- net/sched/sch_cake.c | 32 +++++++++++++------------- net/sched/sch_drr.c | 33 +++++++++++++-------------- net/sched/sch_ets.c | 6 +++-- net/sched/sch_fq_codel.c | 29 ++++++++++++------------ net/sched/sch_fq_pie.c | 28 +++++++++++------------ net/sched/sch_hfsc.c | 6 +++-- net/sched/sch_htb.c | 6 +++-- net/sched/sch_multiq.c | 6 +++-- net/sched/sch_prio.c | 7 ++++-- net/sched/sch_qfq.c | 34 +++++++++++++--------------- net/sched/sch_sfb.c | 29 ++++++++++++------------ net/sched/sch_sfq.c | 28 +++++++++++------------ 16 files changed, 195 insertions(+), 142 deletions(-)diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a587e83fc169..b1c069c8e7f2 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h@@ -80,6 +80,8 @@ FN(IPV6_NDISC_BAD_OPTIONS) \ FN(IPV6_NDISC_NS_OTHERHOST) \ FN(QUEUE_PURGE) \ + FN(TC_EGRESS_ERROR) \ + FN(TC_INGRESS_ERROR) \ FNe(MAX) /**@@ -345,6 +347,10 @@ enum skb_drop_reason { SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST, /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */ SKB_DROP_REASON_QUEUE_PURGE, + /** @SKB_DROP_REASON_TC_EGRESS_ERROR: dropped in TC egress HOOK due to error */ + SKB_DROP_REASON_TC_EGRESS_ERROR, + /** @SKB_DROP_REASON_TC_INGRESS_ERROR: dropped in TC ingress HOOK due to error */ + SKB_DROP_REASON_TC_INGRESS_ERROR, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gendiff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f232512505f8..9a3f71d2545e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h@@ -326,6 +326,7 @@ struct Qdisc_ops { struct tcf_result { + u32 verdict; union { struct { unsigned long class;@@ -336,6 +337,12 @@ struct tcf_result { }; }; +static inline void tcf_result_set_verdict(struct tcf_result *res, + const u32 verdict) +{ + res->verdict = verdict; +} + struct tcf_chain; struct tcf_proto_ops {diff --git a/net/core/dev.c b/net/core/dev.c index ccff2b6ef958..1450f4741d9b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c@@ -3910,31 +3910,39 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_NET_XGRESS -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, + struct tcf_result *res) { - int ret = TC_ACT_UNSPEC; + int ret = 0; #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq); - struct tcf_result res; - if (!miniq) + if (!miniq) { + tcf_result_set_verdict(res, TC_ACT_UNSPEC); return ret; + } tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); - ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); + ret = tcf_classify(skb, miniq->block, miniq->filter_list, res, false); + if (ret < 0) { + mini_qdisc_qstats_cpu_drop(miniq); + return ret; + } /* Only tcf related quirks below. */ - switch (ret) { + switch (res->verdict) { case TC_ACT_SHOT: mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: case TC_ACT_RECLASSIFY: - skb->tc_index = TC_H_MIN(res.classid); + skb->tc_index = TC_H_MIN(res->classid); break; } +#else + tcf_result_set_verdict(res, TC_ACT_UNSPEC); #endif /* CONFIG_NET_CLS_ACT */ return ret; }@@ -3977,6 +3985,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev, bool *another) { struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); + struct tcf_result res = {0}; int sch_ret; if (!entry)@@ -3994,9 +4003,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, if (sch_ret != TC_ACT_UNSPEC) goto ingress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &res); + if (sch_ret < 0) { + kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS_ERROR); + *ret = NET_RX_DROP; + return NULL; + } ingress_verdict: - switch (sch_ret) { + switch (res.verdict) {
This breaks tcx, please move all this logic into tc_run(). No changes to sch_handle_ingress() or sch_handle_egress should be necessary, you can then just remap the return code to TC_ACT_SHOT in such case.
quoted hunk ↗ jump to hunk
case TC_ACT_REDIRECT: /* skb_mac_header check was done by BPF, so we can safely * push the L2 header back before redirecting to another@@ -4032,6 +4046,7 @@ static __always_inline struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); + struct tcf_result res = {0}; int sch_ret; if (!entry)@@ -4045,9 +4060,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (sch_ret != TC_ACT_UNSPEC) goto egress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &res); + if (sch_ret < 0) { + kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS_ERROR); + *ret = NET_XMIT_DROP; + return NULL; + } egress_verdict: - switch (sch_ret) { + switch (res.verdict) { case TC_ACT_REDIRECT: /* No need to push/pop skb's mac_header here on egress! */ skb_do_redirect(skb);