Re: [PATCH net-next v2 11/15] gve: split up notify block allocation and setup paths
From: Joshua Washington <joshwash@google.com>
Date: 2026-06-07 22:29:50
Also in:
bpf, lkml
On Tue, Jun 2, 2026 at 4:59 PM Harshitha Ramamurthy [off-list ref] wrote:
quoted hunk ↗ jump to hunk
From: Joshua Washington <joshwash@google.com> Before this patch, notify block allocation and setup occurred in the same method. This all occurred before gve_adminq_configure_device_resources, which populates the irq_db_indicies array, a DMA region with BAR offsets for MSI-X vectors. The coming mailbox mode will require notify blocks to be set up only after receiving the IRQ doorbell offsets, as the request does not work with a supplied DMA buffer in the way that admin queue mode does. The intended flow in that case would be: 1) allocate notify blocks 2) request doorbell information 3) set up MSI-X vectors based on doorbell info This ordering also works for admin queue mode, so it will be updated to match. Reviewed-by: Willem de Bruijn <willemb@google.com> Reviewed-by: Jordan Rhee <redacted> Signed-off-by: Joshua Washington <joshwash@google.com> Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com> --- drivers/net/ethernet/google/gve/gve.h | 2 + drivers/net/ethernet/google/gve/gve_main.c | 158 ++++++++++++--------- 2 files changed, 89 insertions(+), 71 deletions(-)diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 939b245ed562..e173c14bdc08 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h@@ -673,6 +673,7 @@ struct gve_notify_block { struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */ u32 irq; + bool irq_requested; }; /* Tracks allowed and current rx queue settings */@@ -953,6 +954,7 @@ struct gve_priv { u64 link_speed; bool up_before_suspend; /* True if dev was up before suspend */ + bool mgmt_irq_requested; struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 06df6d8ad429..98970508ae54 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c@@ -425,6 +425,24 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) return work_done; } +static void gve_free_notify_blocks(struct gve_priv *priv) +{ + pci_disable_msix(priv->pdev); + if (priv->irq_db_indices) { + dma_free_coherent(&priv->pdev->dev, + priv->num_ntfy_blks * + sizeof(*priv->irq_db_indices), + priv->irq_db_indices, + priv->irq_db_indices_bus); + priv->irq_db_indices = NULL; + } + + kvfree(priv->ntfy_blocks); + priv->ntfy_blocks = NULL; + kvfree(priv->msix_vectors); + priv->msix_vectors = NULL; +} + static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) { if (priv->numa_node == NUMA_NO_NODE)@@ -436,11 +454,9 @@ static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) static int gve_alloc_notify_blocks(struct gve_priv *priv) { int num_vecs_requested = priv->num_ntfy_blks + 1; - const struct cpumask *node_mask; - unsigned int cur_cpu; int vecs_enabled; - int i, j; int err; + int i; priv->msix_vectors = kvzalloc_objs(*priv->msix_vectors, num_vecs_requested);@@ -454,7 +470,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", GVE_MIN_MSIX, vecs_enabled); err = vecs_enabled; - goto abort_with_msix_vectors; + goto abort; } if (vecs_enabled != num_vecs_requested) { int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;@@ -477,15 +493,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; } - /* Setup Management Vector - the last vector */ - snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", - pci_name(priv->pdev)); - err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, - gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); - if (err) { - dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); - goto abort_with_msix_enabled; - } priv->irq_db_indices = dma_alloc_coherent(&priv->pdev->dev, priv->num_ntfy_blks *@@ -493,15 +500,65 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) &priv->irq_db_indices_bus, GFP_KERNEL); if (!priv->irq_db_indices) { err = -ENOMEM; - goto abort_with_mgmt_vector; + goto abort; } priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), GFP_KERNEL); if (!priv->ntfy_blocks) { err = -ENOMEM; - goto abort_with_irq_db_indices; + goto abort; + } + return 0; + +abort: + gve_free_notify_blocks(priv); + return err; +} + +static void gve_teardown_notify_blocks(struct gve_priv *priv) +{ + int i; + + if (!priv->ntfy_blocks) + return; + + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + + if (!block->irq_requested) + continue; + + irq_set_affinity_hint(priv->msix_vectors[i].vector, + NULL); + free_irq(priv->msix_vectors[i].vector, block); + block->irq = 0; + block->irq_requested = false; + } + + if (priv->mgmt_irq_requested) { + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); + priv->mgmt_irq_requested = false; + } +} + +static int gve_setup_notify_blocks(struct gve_priv *priv) +{ + const struct cpumask *node_mask; + unsigned int cur_cpu; + int i; + int err; + + /* Setup Management Vector - the last vector */ + snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), + "gve-mgmnt@pci:%s", pci_name(priv->pdev)); + err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, + gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); + if (err) { + dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); + return err; } + priv->mgmt_irq_requested = true; /* Setup the other blocks - the first n-1 vectors */ node_mask = gve_get_node_mask(priv);@@ -519,9 +576,10 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) if (err) { dev_err(&priv->pdev->dev, "Failed to receive msix vector %d\n", i); - goto abort_with_some_ntfy_blocks; + goto abort; } block->irq = priv->msix_vectors[msix_idx].vector; + block->irq_requested = true; irq_set_affinity_and_hint(block->irq, cpumask_of(cur_cpu)); block->irq_db_index = &priv->irq_db_indices[i].index;@@ -535,61 +593,12 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) cur_cpu = cpumask_first(node_mask); } return 0; -abort_with_some_ntfy_blocks: - for (j = 0; j < i; j++) { - struct gve_notify_block *block = &priv->ntfy_blocks[j]; - int msix_idx = j; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - NULL); - free_irq(priv->msix_vectors[msix_idx].vector, block); - block->irq = 0; - } - kvfree(priv->ntfy_blocks); - priv->ntfy_blocks = NULL; -abort_with_irq_db_indices: - dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * - sizeof(*priv->irq_db_indices), - priv->irq_db_indices, priv->irq_db_indices_bus); - priv->irq_db_indices = NULL; -abort_with_mgmt_vector: - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); -abort_with_msix_enabled: - pci_disable_msix(priv->pdev); -abort_with_msix_vectors: - kvfree(priv->msix_vectors); - priv->msix_vectors = NULL; +abort: + gve_teardown_notify_blocks(priv); return err; } -static void gve_free_notify_blocks(struct gve_priv *priv) -{ - int i; - - if (!priv->msix_vectors) - return; - - /* Free the irqs */ - for (i = 0; i < priv->num_ntfy_blks; i++) { - struct gve_notify_block *block = &priv->ntfy_blocks[i]; - int msix_idx = i; - - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - NULL); - free_irq(priv->msix_vectors[msix_idx].vector, block); - block->irq = 0; - } - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); - kvfree(priv->ntfy_blocks); - priv->ntfy_blocks = NULL; - dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * - sizeof(*priv->irq_db_indices), - priv->irq_db_indices, priv->irq_db_indices_bus); - priv->irq_db_indices = NULL; - pci_disable_msix(priv->pdev); - kvfree(priv->msix_vectors); - priv->msix_vectors = NULL; -} static void gve_free_control_plane_resources(struct gve_priv *priv) {@@ -743,6 +752,7 @@ static void gve_teardown_control_plane_resources(struct gve_priv *priv) static void gve_teardown_device(struct gve_priv *priv) { + gve_teardown_notify_blocks(priv);
Sashiko says: In gve_remove(), the patch series reordered the teardown sequence to call destroy_workqueue() before gve_teardown_device(). Because gve_teardown_device() (via gve_teardown_notify_blocks()) is what calls free_irq() to release the management interrupt, the interrupt remains active while and after the workqueue is destroyed. If a management interrupt fires during this window, gve_mgmnt_intr() will [queue the service task on the work queue]. Can this sequence trigger a use-after-free by enqueuing work on the destroyed workqueue? This will be fixed by the disable_work() change in patch 9.
quoted hunk ↗ jump to hunk
gve_teardown_control_plane_resources(priv); gve_adminq_free(priv); /*@@ -2463,13 +2473,16 @@ static int gve_setup_device(struct gve_priv *priv) err = gve_alloc_control_plane_resources(priv); if (err) - goto err; + return err; + err = gve_setup_control_plane_resources(priv); if (err) - goto err; + return err; + + err = gve_setup_notify_blocks(priv); + if (err) + return err; return 0; -err: - return err; } static const struct gve_ctrl_ops gve_adminq_ops = {@@ -2591,6 +2604,9 @@ int gve_reset(struct gve_priv *priv, bool skip_queue_setup) gve_unregister_qpls(priv); } + gve_teardown_notify_blocks(priv); + gve_teardown_control_plane_resources(priv); +
Sashiko says: ... The newly added gve_teardown_control_plane_resources(priv) call already invokes gve_teardown_clock() internally when device_resources_ok is set, so is the standalone gve_teardown_clock(priv) call further down now redundant? Will remove the redundant call.
/* Reset the device by releasing the AQ. Rings and other resources
* within the NIC are implicitly destroyed if commands fail.
*/
--
2.54.0.1013.g208068f2d8-goog-- Joshua Washington | Software Engineer | joshwash@google.com | (414) 366-4423