[PATCH net-next] net: mana: Add handler for hardware servicing events
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: 2025-05-07 15:59:28
Also in:
bpf, linux-rdma, lkml, netdev
Subsystem:
hyper-v/azure core and drivers, networking drivers, networking [general], the rest · Maintainers:
"K. Y. Srinivasan", Haiyang Zhang, Wei Liu, Dexuan Cui, Long Li, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
To collaborate with hardware servicing events, upon receiving the special EQE notification from the HW channel, remove the devices on this bus. Then, after a waiting period based on the device specs, rescan the parent bus to recover the devices. Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> --- .../net/ethernet/microsoft/mana/gdma_main.c | 61 +++++++++++++++++++ include/net/mana/gdma.h | 5 +- 2 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 4ffaf7588885..aa2ccf4d0ec6 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c@@ -352,11 +352,52 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) } EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA"); +#define MANA_SERVICE_PERIOD 10 + +struct mana_serv_work { + struct work_struct serv_work; + struct pci_dev *pdev; +}; + +static void mana_serv_func(struct work_struct *w) +{ + struct mana_serv_work *mns_wk = container_of(w, struct mana_serv_work, serv_work); + struct pci_dev *pdev = mns_wk->pdev; + struct pci_bus *bus, *parent; + + if (!pdev) + goto out; + + bus = pdev->bus; + if (!bus) { + dev_err(&pdev->dev, "MANA service: no bus\n"); + goto out; + } + + parent = bus->parent; + if (!parent) { + dev_err(&pdev->dev, "MANA service: no parent bus\n"); + goto out; + } + + pci_stop_and_remove_bus_device_locked(bus->self); + + msleep(MANA_SERVICE_PERIOD * 1000); + + pci_lock_rescan_remove(); + pci_rescan_bus(parent); + pci_unlock_rescan_remove(); + +out: + kfree(mns_wk); +} + static void mana_gd_process_eqe(struct gdma_queue *eq) { u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); struct gdma_context *gc = eq->gdma_dev->gdma_context; struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; + struct mana_serv_work *mns_wk; union gdma_eqe_info eqe_info; enum gdma_eqe_type type; struct gdma_event event;
@@ -400,6 +441,26 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) eq->eq.callback(eq->eq.context, eq, &event); break; + case GDMA_EQE_HWC_FPGA_RECONFIG: + case GDMA_EQE_HWC_SOCMANA_CRASH: + if (gc->in_service) { + dev_info(gc->dev, "Already in service\n"); + break; + } + + mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC); + if (!mns_wk) { + dev_err(gc->dev, "Fail to alloc mana_serv_work\n"); + break; + } + + dev_info(gc->dev, "Start MANA service\n"); + gc->in_service = true; + mns_wk->pdev = to_pci_dev(gc->dev); + INIT_WORK(&mns_wk->serv_work, mana_serv_func); + schedule_work(&mns_wk->serv_work); + break; + default: break; }
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 228603bf03f2..13cfbcf67815 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h@@ -58,8 +58,9 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, GDMA_EQE_HWC_INIT_DATA = 130, GDMA_EQE_HWC_INIT_DONE = 131, - GDMA_EQE_HWC_SOC_RECONFIG = 132, + GDMA_EQE_HWC_FPGA_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + GDMA_EQE_HWC_SOCMANA_CRASH = 135, GDMA_EQE_RNIC_QP_FATAL = 176, };
@@ -388,6 +389,8 @@ struct gdma_context { u32 test_event_eq_id; bool is_pf; + bool in_service; + phys_addr_t bar0_pa; void __iomem *bar0_va; void __iomem *shm_base;
--
2.34.1