Thread (25 messages) 25 messages, 2 authors, 24d ago

Re: [PATCH net-next v2 12/15] gve: introduce new methods to handle IRQ doorbells

From: Joshua Washington <joshwash@google.com>
Date: 2026-06-07 22:36:12
Also in: bpf, lkml

On Tue, Jun 2, 2026 at 4:59 PM Harshitha Ramamurthy
[off-list ref] wrote:
quoted hunk ↗ jump to hunk
From: Joshua Washington <joshwash@google.com>

Introduce `request_db_info` and `free_db_resources` to
`struct gve_ctrl_ops`. These encapsulate the configuration of device
resources (counter arrays and IRQ doorbell indices) which vary between
Admin Queue and Mailbox modes. All behaviors related to the IRQ doorbell
indices will be managed by these new methods instead of occurring
directly in notify_block setup/teardown methods. Similarly, GQ ring
counters will be managed in `request_db_info`.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jordan Rhee <redacted>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
---
 drivers/net/ethernet/google/gve/gve.h        | 12 ++++
 drivers/net/ethernet/google/gve/gve_adminq.c | 71 ++++++++++++++++++++
 drivers/net/ethernet/google/gve/gve_adminq.h |  2 +
 drivers/net/ethernet/google/gve/gve_main.c   | 70 +++++--------------
 4 files changed, 103 insertions(+), 52 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index e173c14bdc08..6db5fbc0b321 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -833,6 +833,8 @@ struct gve_device_info {
  *                  structures stored in @priv to be used during initialization.
  * @set_num_ntfy_blks: Sets no. of vectors into @priv to be used during
  *                     initialization.
+ * @request_db_info: Request and store doorbell information into @priv
+ * @free_db_resources: Free DMA memory holding doorbell info (AdminQ only)
  * @get_ptype_map: Learn packet type map from device and store it in @priv
  * @configure_rss: Set up default RSS configuration
  * @setup_stats_report: Set up DMA region for stats report (AdminQ only)
@@ -843,6 +845,8 @@ struct gve_ctrl_ops {
        void (*unmap_db_bar)(struct gve_priv *priv);
        void (*set_num_queues)(struct gve_priv *priv);
        int (*set_num_ntfy_blks)(struct gve_priv *priv);
+       int (*request_db_info)(struct gve_priv *priv);
+       void (*free_db_resources)(struct gve_priv *priv);
        int (*get_ptype_map)(struct gve_priv *priv);
        int (*configure_rss)(struct gve_priv *priv,
                             struct ethtool_rxfh_param *param);
@@ -1163,6 +1167,11 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
        return (priv->num_ntfy_blks / 2) + queue_idx;
 }

+static inline u32 gve_ntfy_to_msix_idx(struct gve_priv *priv, u32 ntfy_blk_idx)
+{
+       return ntfy_blk_idx;
+}
+
 static inline bool gve_is_qpl(struct gve_priv *priv)
 {
        return priv->queue_format == GVE_GQI_QPL_FORMAT ||
@@ -1375,6 +1384,9 @@ int gve_adjust_queues(struct gve_priv *priv,
                      struct gve_rx_queue_config new_rx_config,
                      struct gve_tx_queue_config new_tx_config,
                      bool reset_rss);
+/* Initialization sequence */
+int gve_alloc_counter_array(struct gve_priv *priv);
+void gve_free_counter_array(struct gve_priv *priv);
 /* flow steering rule */
 int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
 int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs);
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index d814108deeef..259dcd617216 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -1719,3 +1719,74 @@ void gve_adminq_set_num_queues(struct gve_priv *priv)
                                                device_info->default_rx_queues,
                                                priv->rx_cfg.num_queues);
 }
+
+int gve_adminq_request_db_info(struct gve_priv *priv)
+{
+       int err;
+       int i;
+
+       /* Alloc dma addrs needed for shm regions */
+       err = gve_alloc_counter_array(priv);
+       if (err) {
+               dev_err(&priv->pdev->dev,
+                       "Failed to alloc db counter array.");
+               return err;
+       }
+
+       priv->irq_db_indices =
+               dma_alloc_coherent(&priv->pdev->dev,
+                                  priv->num_ntfy_blks *
+                                  sizeof(*priv->irq_db_indices),
+                                  &priv->irq_db_indices_bus, GFP_KERNEL);
+       if (!priv->irq_db_indices) {
+               err = -ENOMEM;
+               goto abort_with_counter_array;
+       }
+
+       err = gve_adminq_configure_device_resources(priv,
+                                                   priv->counter_array_bus,
+                                                   priv->num_event_counters,
+                                                   priv->irq_db_indices_bus,
+                                                   priv->num_ntfy_blks);
+       if (unlikely(err)) {
+               dev_err(&priv->pdev->dev,
+                       "could not setup device_resources: err=%d\n", err);
+               err = -ENXIO;
+               goto abort_with_irq_db_indices;
+       }
+
+       for (i = 0; i < priv->num_ntfy_blks; i++)
+               priv->ntfy_blocks[i].irq_db_index =
+                       &priv->irq_db_indices[i].index;
+       return 0;
+
+abort_with_irq_db_indices:
+       dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+                         sizeof(*priv->irq_db_indices),
+                         priv->irq_db_indices, priv->irq_db_indices_bus);
+       priv->irq_db_indices = NULL;
+abort_with_counter_array:
+       gve_free_counter_array(priv);
+       return err;
+}
Sashiko says:
Does this error path introduce a use-after-free window?
If the configure command in gve_adminq_configure_device_resources() times
out (e.g. returns -ETIME), the hardware might still be running and attempting
to access the memory.
Since we immediately free the DMA memory via dma_free_coherent() and
gve_free_counter_array() before the hardware is guaranteed to be stopped, can
the hardware overwrite reallocated kernel memory?

Will move the allocation/free to alloc/free_control_plane_resources.
See the separate concern regarding performing the freeing in
`free_db_resources`.
+
+void gve_adminq_free_db_resources(struct gve_priv *priv)
+{
+       int err;
+
+       /* Log error in deconfigure device, but don't fail. This is only ever
+        * called as a reset is about to be triggered, so it would be redundant
+        * to trigger a reset.
+        */
+       err = gve_adminq_deconfigure_device_resources(priv);
+       if (err)
+               dev_err(&priv->pdev->dev,
+                       "Could not deconfigure device resources: err=%d\n",
+                       err);
+
+       dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+                         sizeof(*priv->irq_db_indices),
+                         priv->irq_db_indices, priv->irq_db_indices_bus);
+       priv->irq_db_indices = NULL;
+       gve_free_counter_array(priv);
+}
Sashiko says:
Could freeing the DMA memory here cause memory corruption?
In an earlier patch in this series ('gve: simplify reset logic'), the teardown
logic was explicitly fixed to delay freeing shared resources until after
gve_adminq_free() triggers a device reset.
However, gve_teardown_device() executes gve_teardown_control_plane_resources()
(which now calls ops->free_db_resources()) before gve_adminq_free().
If the deconfigure command in gve_adminq_deconfigure_device_resources() times
out or fails, the active hardware might continue writing to the freed DMA
memory since the device hasn't been reset yet.

This is a tricky problem. It looks like there is no real choice but to
re-introduce admin queue-specific allocation paths back into
`alloc_control_plane_resources` :/
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index d5f98a80be46..985b1aefea6e 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -657,5 +657,7 @@ int gve_adminq_map_db_bar(struct gve_priv *priv);
 void gve_adminq_unmap_db_bar(struct gve_priv *priv);
 int gve_adminq_set_num_ntfy_blks(struct gve_priv *priv);
 void gve_adminq_set_num_queues(struct gve_priv *priv);
+int gve_adminq_request_db_info(struct gve_priv *priv);
+void gve_adminq_free_db_resources(struct gve_priv *priv);

 #endif /* _GVE_ADMINQ_H */
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 98970508ae54..55f48aee125e 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -179,7 +179,7 @@ static void gve_free_rss_config_cache(struct gve_priv *priv)
        memset(rss_config, 0, sizeof(*rss_config));
 }

-static int gve_alloc_counter_array(struct gve_priv *priv)
+int gve_alloc_counter_array(struct gve_priv *priv)
 {
        priv->counter_array =
                dma_alloc_coherent(&priv->pdev->dev,
@@ -192,7 +192,7 @@ static int gve_alloc_counter_array(struct gve_priv *priv)
        return 0;
 }

-static void gve_free_counter_array(struct gve_priv *priv)
+void gve_free_counter_array(struct gve_priv *priv)
 {
        if (!priv->counter_array)
                return;
@@ -428,15 +428,6 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
 static void gve_free_notify_blocks(struct gve_priv *priv)
 {
        pci_disable_msix(priv->pdev);
-       if (priv->irq_db_indices) {
-               dma_free_coherent(&priv->pdev->dev,
-                                 priv->num_ntfy_blks *
-                                 sizeof(*priv->irq_db_indices),
-                                 priv->irq_db_indices,
-                                 priv->irq_db_indices_bus);
-               priv->irq_db_indices = NULL;
-       }
-
        kvfree(priv->ntfy_blocks);
        priv->ntfy_blocks = NULL;
        kvfree(priv->msix_vectors);
@@ -493,24 +484,14 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
                        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
        }

-       priv->irq_db_indices =
-               dma_alloc_coherent(&priv->pdev->dev,
-                                  priv->num_ntfy_blks *
-                                  sizeof(*priv->irq_db_indices),
-                                  &priv->irq_db_indices_bus, GFP_KERNEL);
-       if (!priv->irq_db_indices) {
-               err = -ENOMEM;
-               goto abort;
-       }
-
        priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
                                     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
        if (!priv->ntfy_blocks) {
                err = -ENOMEM;
                goto abort;
        }
-       return 0;

+       return 0;
 abort:
        gve_free_notify_blocks(priv);
        return err;
@@ -525,13 +506,14 @@ static void gve_teardown_notify_blocks(struct gve_priv *priv)

        for (i = 0; i < priv->num_ntfy_blks; i++) {
                struct gve_notify_block *block = &priv->ntfy_blocks[i];
+               int msix_idx = gve_ntfy_to_msix_idx(priv, i);

                if (!block->irq_requested)
                        continue;

-               irq_set_affinity_hint(priv->msix_vectors[i].vector,
+               irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
                                      NULL);
-               free_irq(priv->msix_vectors[i].vector, block);
+               free_irq(priv->msix_vectors[msix_idx].vector, block);
                block->irq = 0;
                block->irq_requested = false;
        }
@@ -560,12 +542,11 @@ static int gve_setup_notify_blocks(struct gve_priv *priv)
        }
        priv->mgmt_irq_requested = true;

-       /* Setup the other blocks - the first n-1 vectors */
        node_mask = gve_get_node_mask(priv);
        cur_cpu = cpumask_first(node_mask);
        for (i = 0; i < priv->num_ntfy_blks; i++) {
                struct gve_notify_block *block = &priv->ntfy_blocks[i];
-               int msix_idx = i;
+               int msix_idx = gve_ntfy_to_msix_idx(priv, i);

                snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
                         i, pci_name(priv->pdev));
@@ -575,14 +556,13 @@ static int gve_setup_notify_blocks(struct gve_priv *priv)
                                  IRQF_NO_AUTOEN, block->name, block);
                if (err) {
                        dev_err(&priv->pdev->dev,
-                               "Failed to receive msix vector %d\n", i);
+                               "Failed to receive msix vector %d\n", msix_idx);
                        goto abort;
                }
                block->irq = priv->msix_vectors[msix_idx].vector;
                block->irq_requested = true;
                irq_set_affinity_and_hint(block->irq,
                                          cpumask_of(cur_cpu));
-               block->irq_db_index = &priv->irq_db_indices[i].index;

                cur_cpu = cpumask_next(cur_cpu, node_mask);
                /* Wrap once CPUs in the node have been exhausted, or when
@@ -599,7 +579,6 @@ static int gve_setup_notify_blocks(struct gve_priv *priv)
        return err;
 }

-
 static void gve_free_control_plane_resources(struct gve_priv *priv)
 {
        bitmap_free(priv->xsk_pools);
@@ -608,9 +587,8 @@ static void gve_free_control_plane_resources(struct gve_priv *priv)
        kvfree(priv->ptype_lut_dqo);
        priv->ptype_lut_dqo = NULL;

-       gve_free_stats_report(priv);
        gve_free_notify_blocks(priv);
-       gve_free_counter_array(priv);
+       gve_free_stats_report(priv);
        gve_free_rss_config_cache(priv);
        gve_free_flow_rule_caches(priv);
 }
@@ -623,9 +601,6 @@ static int gve_alloc_control_plane_resources(struct gve_priv *priv)
        if (err)
                return err;
        err = gve_alloc_rss_config_cache(priv);
-       if (err)
-               goto abort;
-       err = gve_alloc_counter_array(priv);
        if (err)
                goto abort;
        err = gve_alloc_notify_blocks(priv);
@@ -661,15 +636,9 @@ static int gve_setup_control_plane_resources(struct gve_priv *priv)
        const struct gve_ctrl_ops *ops = priv->ctrl_ops;
        int err;

-       err = gve_adminq_configure_device_resources(priv,
-                                                   priv->counter_array_bus,
-                                                   priv->num_event_counters,
-                                                   priv->irq_db_indices_bus,
-                                                   priv->num_ntfy_blks);
-       if (unlikely(err)) {
-               dev_err(&priv->pdev->dev,
-                       "could not setup device_resources: err=%d\n", err);
-               err = -ENXIO;
+       err = ops->request_db_info(priv);
+       if (err) {
+               dev_err(&priv->pdev->dev, "Failed to get db info");
                return err;
        }
@@ -678,7 +647,7 @@ static int gve_setup_control_plane_resources(struct gve_priv *priv)
                if (err) {
                        dev_err(&priv->pdev->dev,
                                "Failed to get ptype map: err=%d\n", err);
-                       goto deconfigure_device;
+                       goto free_db_resources;
                }
        }
@@ -708,8 +677,8 @@ static int gve_setup_control_plane_resources(struct gve_priv *priv)

 teardown_clock:
        gve_teardown_clock(priv);
-deconfigure_device:
-       gve_adminq_deconfigure_device_resources(priv);
+free_db_resources:
+       ops->free_db_resources(priv);
        return err;
 }
@@ -739,12 +708,7 @@ static void gve_teardown_control_plane_resources(struct gve_priv *priv)
                        dev_err(&priv->pdev->dev,
                                "Failed to detach stats report: err=%d\n", err);
                gve_teardown_clock(priv);
-
-               err = gve_adminq_deconfigure_device_resources(priv);
-               if (err)
-                       dev_err(&priv->pdev->dev,
-                               "Could not deconfigure device resources: err=%d\n",
-                               err);
+               ops->free_db_resources(priv);
        }

        gve_clear_device_resources_ok(priv);
@@ -2494,6 +2458,8 @@ static const struct gve_ctrl_ops gve_adminq_ops = {
        .reset_flow_rules       = gve_adminq_reset_flow_rules,
        .setup_stats_report     = gve_adminq_report_stats,
        .configure_rss          = gve_adminq_configure_rss,
+       .request_db_info        = gve_adminq_request_db_info,
+       .free_db_resources      = gve_adminq_free_db_resources,
 };

 static int gve_init_priv(struct gve_priv *priv)
--
2.54.0.1013.g208068f2d8-goog

-- 

Joshua Washington | Software Engineer | joshwash@google.com | (414) 366-4423
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help