Thread (25 messages) 25 messages, 2 authors, 17d ago

Re: [PATCH net-next v2 11/15] gve: split up notify block allocation and setup paths

From: Joshua Washington <joshwash@google.com>
Date: 2026-06-07 22:29:50
Also in: bpf, lkml

On Tue, Jun 2, 2026 at 4:59 PM Harshitha Ramamurthy
[off-list ref] wrote:
quoted hunk ↗ jump to hunk
From: Joshua Washington <joshwash@google.com>

Before this patch, notify block allocation and setup occurred in the same
method. This all occurred before gve_adminq_configure_device_resources,
which populates the irq_db_indicies array, a DMA region with BAR offsets
for MSI-X vectors.

The coming mailbox mode will require notify blocks to be set up only
after receiving the IRQ doorbell offsets, as the request does not work
with a supplied DMA buffer in the way that admin queue mode does. The
intended flow in that case would be:

1) allocate notify blocks
2) request doorbell information
3) set up MSI-X vectors based on doorbell info

This ordering also works for admin queue mode, so it will be updated to
match.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jordan Rhee <redacted>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
---
 drivers/net/ethernet/google/gve/gve.h      |   2 +
 drivers/net/ethernet/google/gve/gve_main.c | 158 ++++++++++++---------
 2 files changed, 89 insertions(+), 71 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 939b245ed562..e173c14bdc08 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -673,6 +673,7 @@ struct gve_notify_block {
        struct gve_tx_ring *tx; /* tx rings on this block */
        struct gve_rx_ring *rx; /* rx rings on this block */
        u32 irq;
+       bool irq_requested;
 };

 /* Tracks allowed and current rx queue settings */
@@ -953,6 +954,7 @@ struct gve_priv {
        u64 link_speed;
        bool up_before_suspend; /* True if dev was up before suspend */

+       bool mgmt_irq_requested;
        struct gve_ptype_lut *ptype_lut_dqo;

        /* Must be a power of two. */
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 06df6d8ad429..98970508ae54 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -425,6 +425,24 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
        return work_done;
 }

+static void gve_free_notify_blocks(struct gve_priv *priv)
+{
+       pci_disable_msix(priv->pdev);
+       if (priv->irq_db_indices) {
+               dma_free_coherent(&priv->pdev->dev,
+                                 priv->num_ntfy_blks *
+                                 sizeof(*priv->irq_db_indices),
+                                 priv->irq_db_indices,
+                                 priv->irq_db_indices_bus);
+               priv->irq_db_indices = NULL;
+       }
+
+       kvfree(priv->ntfy_blocks);
+       priv->ntfy_blocks = NULL;
+       kvfree(priv->msix_vectors);
+       priv->msix_vectors = NULL;
+}
+
 static const struct cpumask *gve_get_node_mask(struct gve_priv *priv)
 {
        if (priv->numa_node == NUMA_NO_NODE)
@@ -436,11 +454,9 @@ static const struct cpumask *gve_get_node_mask(struct gve_priv *priv)
 static int gve_alloc_notify_blocks(struct gve_priv *priv)
 {
        int num_vecs_requested = priv->num_ntfy_blks + 1;
-       const struct cpumask *node_mask;
-       unsigned int cur_cpu;
        int vecs_enabled;
-       int i, j;
        int err;
+       int i;

        priv->msix_vectors = kvzalloc_objs(*priv->msix_vectors,
                                           num_vecs_requested);
@@ -454,7 +470,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
                dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
                        GVE_MIN_MSIX, vecs_enabled);
                err = vecs_enabled;
-               goto abort_with_msix_vectors;
+               goto abort;
        }
        if (vecs_enabled != num_vecs_requested) {
                int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
@@ -477,15 +493,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
                        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
        }

-       /* Setup Management Vector  - the last vector */
-       snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
-                pci_name(priv->pdev));
-       err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
-                         gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
-       if (err) {
-               dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
-               goto abort_with_msix_enabled;
-       }
        priv->irq_db_indices =
                dma_alloc_coherent(&priv->pdev->dev,
                                   priv->num_ntfy_blks *
@@ -493,15 +500,65 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
                                   &priv->irq_db_indices_bus, GFP_KERNEL);
        if (!priv->irq_db_indices) {
                err = -ENOMEM;
-               goto abort_with_mgmt_vector;
+               goto abort;
        }

        priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
                                     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
        if (!priv->ntfy_blocks) {
                err = -ENOMEM;
-               goto abort_with_irq_db_indices;
+               goto abort;
+       }
+       return 0;
+
+abort:
+       gve_free_notify_blocks(priv);
+       return err;
+}
+
+static void gve_teardown_notify_blocks(struct gve_priv *priv)
+{
+       int i;
+
+       if (!priv->ntfy_blocks)
+               return;
+
+       for (i = 0; i < priv->num_ntfy_blks; i++) {
+               struct gve_notify_block *block = &priv->ntfy_blocks[i];
+
+               if (!block->irq_requested)
+                       continue;
+
+               irq_set_affinity_hint(priv->msix_vectors[i].vector,
+                                     NULL);
+               free_irq(priv->msix_vectors[i].vector, block);
+               block->irq = 0;
+               block->irq_requested = false;
+       }
+
+       if (priv->mgmt_irq_requested) {
+               free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
+               priv->mgmt_irq_requested = false;
+       }
+}
+
+static int gve_setup_notify_blocks(struct gve_priv *priv)
+{
+       const struct cpumask *node_mask;
+       unsigned int cur_cpu;
+       int i;
+       int err;
+
+       /* Setup Management Vector  - the last vector */
+       snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name),
+                "gve-mgmnt@pci:%s", pci_name(priv->pdev));
+       err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
+                         gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
+       if (err) {
+               dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
+               return err;
        }
+       priv->mgmt_irq_requested = true;

        /* Setup the other blocks - the first n-1 vectors */
        node_mask = gve_get_node_mask(priv);
@@ -519,9 +576,10 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
                if (err) {
                        dev_err(&priv->pdev->dev,
                                "Failed to receive msix vector %d\n", i);
-                       goto abort_with_some_ntfy_blocks;
+                       goto abort;
                }
                block->irq = priv->msix_vectors[msix_idx].vector;
+               block->irq_requested = true;
                irq_set_affinity_and_hint(block->irq,
                                          cpumask_of(cur_cpu));
                block->irq_db_index = &priv->irq_db_indices[i].index;
@@ -535,61 +593,12 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
                        cur_cpu = cpumask_first(node_mask);
        }
        return 0;
-abort_with_some_ntfy_blocks:
-       for (j = 0; j < i; j++) {
-               struct gve_notify_block *block = &priv->ntfy_blocks[j];
-               int msix_idx = j;

-               irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
-                                     NULL);
-               free_irq(priv->msix_vectors[msix_idx].vector, block);
-               block->irq = 0;
-       }
-       kvfree(priv->ntfy_blocks);
-       priv->ntfy_blocks = NULL;
-abort_with_irq_db_indices:
-       dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
-                         sizeof(*priv->irq_db_indices),
-                         priv->irq_db_indices, priv->irq_db_indices_bus);
-       priv->irq_db_indices = NULL;
-abort_with_mgmt_vector:
-       free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
-abort_with_msix_enabled:
-       pci_disable_msix(priv->pdev);
-abort_with_msix_vectors:
-       kvfree(priv->msix_vectors);
-       priv->msix_vectors = NULL;
+abort:
+       gve_teardown_notify_blocks(priv);
        return err;
 }

-static void gve_free_notify_blocks(struct gve_priv *priv)
-{
-       int i;
-
-       if (!priv->msix_vectors)
-               return;
-
-       /* Free the irqs */
-       for (i = 0; i < priv->num_ntfy_blks; i++) {
-               struct gve_notify_block *block = &priv->ntfy_blocks[i];
-               int msix_idx = i;
-
-               irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
-                                     NULL);
-               free_irq(priv->msix_vectors[msix_idx].vector, block);
-               block->irq = 0;
-       }
-       free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
-       kvfree(priv->ntfy_blocks);
-       priv->ntfy_blocks = NULL;
-       dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
-                         sizeof(*priv->irq_db_indices),
-                         priv->irq_db_indices, priv->irq_db_indices_bus);
-       priv->irq_db_indices = NULL;
-       pci_disable_msix(priv->pdev);
-       kvfree(priv->msix_vectors);
-       priv->msix_vectors = NULL;
-}

 static void gve_free_control_plane_resources(struct gve_priv *priv)
 {
@@ -743,6 +752,7 @@ static void gve_teardown_control_plane_resources(struct gve_priv *priv)

 static void gve_teardown_device(struct gve_priv *priv)
 {
+       gve_teardown_notify_blocks(priv);
Sashiko says:
In gve_remove(), the patch series reordered the teardown sequence to call
destroy_workqueue() before gve_teardown_device().
Because gve_teardown_device() (via gve_teardown_notify_blocks()) is what
calls free_irq() to release the management interrupt, the interrupt remains
active while and after the workqueue is destroyed.
If a management interrupt fires during this window, gve_mgmnt_intr() will
[queue the service task on the work queue].
Can this sequence trigger a use-after-free by enqueuing work on the destroyed
workqueue?

This will be fixed by the disable_work() change in patch 9.

quoted hunk ↗ jump to hunk
        gve_teardown_control_plane_resources(priv);
        gve_adminq_free(priv);
        /*
@@ -2463,13 +2473,16 @@ static int gve_setup_device(struct gve_priv *priv)

        err = gve_alloc_control_plane_resources(priv);
        if (err)
-               goto err;
+               return err;
+
        err = gve_setup_control_plane_resources(priv);
        if (err)
-               goto err;
+               return err;
+
+       err = gve_setup_notify_blocks(priv);
+       if (err)
+               return err;
        return 0;
-err:
-       return err;
 }

 static const struct gve_ctrl_ops gve_adminq_ops = {
@@ -2591,6 +2604,9 @@ int gve_reset(struct gve_priv *priv, bool skip_queue_setup)
                gve_unregister_qpls(priv);
        }

+       gve_teardown_notify_blocks(priv);
+       gve_teardown_control_plane_resources(priv);
+
Sashiko says:
...
The newly added gve_teardown_control_plane_resources(priv) call already
invokes gve_teardown_clock() internally when device_resources_ok is
set, so is the standalone gve_teardown_clock(priv) call further down
now redundant?

Will remove the redundant call.
        /* Reset the device by releasing the AQ. Rings and other resources
         * within the NIC are implicitly destroyed if commands fail.
         */
--
2.54.0.1013.g208068f2d8-goog

-- 

Joshua Washington | Software Engineer | joshwash@google.com | (414) 366-4423
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help