Thread (27 messages) 27 messages, 6 authors, 2021-02-26

Re: [PATCH net] net: fix race between napi kthread mode and busy poll

From: Jakub Kicinski <kuba@kernel.org>
Date: 2021-02-26 21:36:13

Possibly related (same subject, not in this thread)

On Fri, 26 Feb 2021 10:28:00 -0800 Wei Wang wrote:
Hi Martin,
Could you help try the following new patch on your setup and let me
know if there are still issues?
FWIW your email got line wrapped for me.
quoted hunk ↗ jump to hunk
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ddf4cfc12615..9ed0f89ccdd5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -357,9 +357,10 @@ enum {
        NAPI_STATE_NPSVC,               /* Netpoll - don't dequeue
from poll_list */
        NAPI_STATE_LISTED,              /* NAPI added to system lists */
        NAPI_STATE_NO_BUSY_POLL,        /* Do not add in napi_hash, no
busy polling */
-       NAPI_STATE_IN_BUSY_POLL,        /* sk_busy_loop() owns this NAPI */
+       NAPI_STATE_IN_BUSY_POLL,        /* sk_busy_loop() grabs SHED
nit: SHED -> SCHED
bit and could busy poll */
        NAPI_STATE_PREFER_BUSY_POLL,    /* prefer busy-polling over
softirq processing*/
        NAPI_STATE_THREADED,            /* The poll is performed
inside its own thread*/
+       NAPI_STATE_SCHED_BUSY_POLL,     /* Napi is currently scheduled
in busy poll mode */
nit: Napi -> NAPI ?
quoted hunk ↗ jump to hunk
 };

 enum {
@@ -372,6 +373,7 @@ enum {
        NAPIF_STATE_IN_BUSY_POLL        = BIT(NAPI_STATE_IN_BUSY_POLL),
        NAPIF_STATE_PREFER_BUSY_POLL    = BIT(NAPI_STATE_PREFER_BUSY_POLL),
        NAPIF_STATE_THREADED            = BIT(NAPI_STATE_THREADED),
+       NAPIF_STATE_SCHED_BUSY_POLL     = BIT(NAPI_STATE_SCHED_BUSY_POLL),
 };

 enum gro_result {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c5967e80132..c717b67ce137 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1501,15 +1501,14 @@ static int napi_kthread_create(struct napi_struct *n)
 {
        int err = 0;

-       /* Create and wake up the kthread once to put it in
-        * TASK_INTERRUPTIBLE mode to avoid the blocked task
-        * warning and work with loadavg.
+       /* Avoid using  kthread_run() here to prevent race
+        * between softirq and kthread polling.
         */
-       n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
-                               n->dev->name, n->napi_id);
+       n->thread = kthread_create(napi_threaded_poll, n, "napi/%s-%d",
+                                  n->dev->name, n->napi_id);
I'm not sure this takes care of rapid:

dev_set_threaded(0)
 # NAPI gets sent to sirq
dev_set_threaded(1)

since subsequent set_threaded(1) doesn't spawn the thread "afresh".
quoted hunk ↗ jump to hunk
        if (IS_ERR(n->thread)) {
                err = PTR_ERR(n->thread);
-               pr_err("kthread_run failed with err %d\n", err);
+               pr_err("kthread_create failed with err %d\n", err);
                n->thread = NULL;
        }
@@ -6486,6 +6485,7 @@ bool napi_complete_done(struct napi_struct *n,
int work_done)
                WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));

                new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
+                             NAPIF_STATE_SCHED_BUSY_POLL |
                              NAPIF_STATE_PREFER_BUSY_POLL);

                /* If STATE_MISSED was set, leave STATE_SCHED set,
@@ -6525,6 +6525,7 @@ static struct napi_struct *napi_by_id(unsigned
int napi_id)

 static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
 {
+       clear_bit(NAPI_STATE_SCHED_BUSY_POLL, &napi->state);
        if (!skip_schedule) {
                gro_normal_list(napi);
                __napi_schedule(napi);
@@ -6624,7 +6625,8 @@ void napi_busy_loop(unsigned int napi_id,
                        }
                        if (cmpxchg(&napi->state, val,
                                    val | NAPIF_STATE_IN_BUSY_POLL |
-                                         NAPIF_STATE_SCHED) != val) {
+                                         NAPIF_STATE_SCHED |
+                                         NAPIF_STATE_SCHED_BUSY_POLL) != val) {
                                if (prefer_busy_poll)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                                goto count;
@@ -6971,7 +6973,10 @@ static int napi_thread_wait(struct napi_struct *napi)
        set_current_state(TASK_INTERRUPTIBLE);

        while (!kthread_should_stop() && !napi_disable_pending(napi)) {
-               if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+               unsigned long val = READ_ONCE(napi->state);
+
+               if (val & NAPIF_STATE_SCHED &&
+                   !(val & NAPIF_STATE_SCHED_BUSY_POLL)) {
Again, not protected from the napi_disable() case AFAICT.
                        WARN_ON(!list_empty(&napi->poll_list));
                        __set_current_state(TASK_RUNNING);
                        return 0;
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help