Thread (3 messages) 3 messages, 2 authors, 7d ago

Re: [PATCH net 1/1] net: sched: ets: avoid deficit wrap and empty-round livelock

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: 2026-06-15 12:17:49
Subsystem: networking [general], tc subsystem, the rest · Maintainers: "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim, Jiri Pirko, Linus Torvalds

On Mon, Jun 15, 2026 at 8:09 AM Jamal Hadi Salim [off-list ref] wrote:
On Mon, Jun 15, 2026 at 6:38 AM Ren Wei [off-list ref] wrote:
quoted
From: Wyatt Feng <redacted>

ETS keeps each DRR-style deficit in a u32 and replenishes it with
the configured quantum whenever the head packet is too large. Both
the quantum and qdisc_pkt_len() are user-controlled inputs: a large
quantum can wrap the deficit counter, while a tiny quantum combined
with an inflated qdisc_pkt_len() can force billions of iterations in
softirq context before any packet becomes eligible.

Store deficits in u64 so replenishment remains monotonic, and after
one complete pass over the active list compute how many additional
full rounds cannot dequeue from any class. Add that budget in bulk
instead of advancing one quantum at a time. This preserves ETS ordering
while removing the non-productive loop.

Fixes: dcc68b4d8084 ("net: sch_ets: Add a new Qdisc")
Cc: stable@vger.kernel.org
Reported-by: Yuan Tan <redacted>
Reported-by: Yifan Wu <redacted>
Reported-by: Juefei Pu <redacted>
Reported-by: Zhengchuan Liang <redacted>
Reported-by: Xin Liu <redacted>
Assisted-by: Codex:GPT-5.4
Signed-off-by: Wyatt Feng <redacted>
Signed-off-by: Ren Wei <redacted>
---
 net/sched/sch_ets.c | 63 +++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 19 deletions(-)
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index a4b07b661b77..ed3b191781ee 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -40,7 +40,7 @@ struct ets_class {
        struct list_head alist; /* In struct ets_sched.active. */
        struct Qdisc *qdisc;
        u32 quantum;
-       u32 deficit;
+       u64 deficit;
        struct gnet_stats_basic_sync bstats;
        struct gnet_stats_queue qstats;
 };
@@ -465,8 +465,10 @@ ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
 static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
 {
        struct ets_sched *q = qdisc_priv(sch);
-       struct ets_class *cl;
+       struct ets_class *cl, *first;
        struct sk_buff *skb;
+       u64 extra_rounds;
+       u64 rounds;
        unsigned int band;
        unsigned int len;
@@ -481,26 +483,49 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
                if (list_empty(&q->active))
                        goto out;

-               cl = list_first_entry(&q->active, struct ets_class, alist);
-               skb = cl->qdisc->ops->peek(cl->qdisc);
-               if (!skb) {
-                       qdisc_warn_nonwc(__func__, cl->qdisc);
-                       goto out;
-               }
+               first = list_first_entry(&q->active, struct ets_class, alist);
+               extra_rounds = U64_MAX;

-               len = qdisc_pkt_len(skb);
-               if (len <= cl->deficit) {
-                       cl->deficit -= len;
-                       skb = qdisc_dequeue_peeked(cl->qdisc);
-                       if (unlikely(!skb))
+               do {
+                       cl = list_first_entry(&q->active, struct ets_class, alist);
+                       skb = cl->qdisc->ops->peek(cl->qdisc);
+                       if (!skb) {
+                               qdisc_warn_nonwc(__func__, cl->qdisc);
                                goto out;
-                       if (cl->qdisc->q.qlen == 0)
-                               list_del_init(&cl->alist);
-                       return ets_qdisc_dequeue_skb(sch, skb);
-               }
+                       }
+
+                       len = qdisc_pkt_len(skb);
+                       if (len <= cl->deficit) {
+                               cl->deficit -= len;
+                               skb = qdisc_dequeue_peeked(cl->qdisc);
+                               if (unlikely(!skb))
+                                       goto out;
+                               if (cl->qdisc->q.qlen == 0)
+                                       list_del_init(&cl->alist);
+                               return ets_qdisc_dequeue_skb(sch, skb);
+                       }
+
+                       cl->deficit += cl->quantum;
+                       list_move_tail(&cl->alist, &q->active);
+
+                       if (len <= cl->deficit) {
+                               extra_rounds = 0;
+                               continue;
+                       }
+
+                       rounds = div64_u64((u64)len - cl->deficit + cl->quantum - 1,
+                                          cl->quantum);
+                       if (rounds < extra_rounds)
+                               extra_rounds = rounds;
+               } while (list_first_entry(&q->active, struct ets_class,
+                                         alist) != first);
+
+               if (!extra_rounds)
+                       continue;

-               cl->deficit += cl->quantum;
-               list_move_tail(&cl->alist, &q->active);
+               /* Skip full rounds where no active class can dequeue. */
+               list_for_each_entry(cl, &q->active, alist)
+                       cl->deficit += extra_rounds * cl->quantum;
        }
 out:
        return NULL;
Hrm. This appears like overkill. Doing a div() in the fast path is questionable.
How about we bound the loop per dequeue to have an upper bound based on nbands?
Roughly (untested/uncompiled) like the following caffeine inspired approach:

------------
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index a4b07b661b77..127f948b3bd6 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -465,6 +465,8 @@ ets_qdisc_dequeue_skb(struct Qdisc *sch, struct
sk_buff *skb)
 static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
 {
        struct ets_sched *q = qdisc_priv(sch);
+       unsigned int max_l = q->nbands * 2;
+       unsigned int cnt_l = 0;
        struct ets_class *cl;
        struct sk_buff *skb;
        unsigned int band;
@@ -501,6 +503,8 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)

                cl->deficit += cl->quantum;
                list_move_tail(&cl->alist, &q->active);
+               if (++cnt_l > max_l)
+                       goto out;
        }
 out:
        return NULL;
------------------
cheers,
jamal
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help