[PATCH v23 18/20] net/mlx5e: NVMEoTCP, async ddp invalidation
From: Aurelien Aptel <hidden>
Date: 2024-02-28 13:00:36
Also in:
linux-nvme
Subsystem:
mellanox ethernet driver (mlx5e), mellanox ethernet innova drivers, mellanox mlx5 core vpi driver, networking drivers, the rest · Maintainers:
Saeed Mahameed, Tariq Toukan, Mark Bloch, Leon Romanovsky, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
From: Ben Ben-Ishay <redacted> After the ULP consumed the buffers of the offloaded request, it calls the ddp_teardown op to release the NIC mapping for them and allow the NIC to reuse the HW contexts associated with offloading this IO. We do a fast/async un-mapping via UMR WQE. In this case, the ULP does holds off with completing the request towards the upper/application layers until the HW unmapping is done. When the corresponding CQE is received, a notification is done via the the teardown_done ddp callback advertised by the ULP in the ddp context. Signed-off-by: Ben Ben-Ishay <redacted> Signed-off-by: Boris Pismenny <borisp@nvidia.com> Signed-off-by: Or Gerlitz <redacted> Signed-off-by: Yoray Zack <redacted> Signed-off-by: Aurelien Aptel <redacted> Reviewed-by: Tariq Toukan <tariqt@nvidia.com> --- .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 4 ++ .../mellanox/mlx5/core/en_accel/nvmeotcp.c | 66 ++++++++++++++++--- .../mellanox/mlx5/core/en_accel/nvmeotcp.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++ 4 files changed, 67 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index c87dca17d5c8..3c124f708afc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h@@ -52,6 +52,7 @@ enum mlx5e_icosq_wqe_type { #endif #ifdef CONFIG_MLX5_EN_NVMEOTCP MLX5E_ICOSQ_WQE_UMR_NVMEOTCP, + MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE, MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP, #endif };
@@ -230,6 +231,9 @@ struct mlx5e_icosq_wqe_info { struct { struct mlx5e_nvmeotcp_queue *queue; } nvmeotcp_q; + struct { + struct mlx5e_nvmeotcp_queue_entry *entry; + } nvmeotcp_qe; #endif }; };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
index dd9c28d60fd3..4c49ff9a7999 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c@@ -142,10 +142,11 @@ build_nvmeotcp_klm_umr(struct mlx5e_nvmeotcp_queue *queue, struct mlx5e_umr_wqe u16 ccid, int klm_entries, u32 klm_offset, u32 len, enum wqe_type klm_type) { - u32 id = (klm_type == KLM_UMR) ? queue->ccid_table[ccid].klm_mkey : - (mlx5e_tir_get_tirn(&queue->tir) << MLX5_WQE_CTRL_TIR_TIS_INDEX_SHIFT); - u8 opc_mod = (klm_type == KLM_UMR) ? MLX5_CTRL_SEGMENT_OPC_MOD_UMR_UMR : - MLX5_OPC_MOD_TRANSPORT_TIR_STATIC_PARAMS; + u32 id = (klm_type == BSF_KLM_UMR) ? + (mlx5e_tir_get_tirn(&queue->tir) << MLX5_WQE_CTRL_TIR_TIS_INDEX_SHIFT) : + queue->ccid_table[ccid].klm_mkey; + u8 opc_mod = (klm_type == BSF_KLM_UMR) ? MLX5_OPC_MOD_TRANSPORT_TIR_STATIC_PARAMS : + MLX5_CTRL_SEGMENT_OPC_MOD_UMR_UMR; u32 ds_cnt = MLX5E_KLM_UMR_DS_CNT(ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)); struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
@@ -158,6 +159,13 @@ build_nvmeotcp_klm_umr(struct mlx5e_nvmeotcp_queue *queue, struct mlx5e_umr_wqe cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); cseg->general_id = cpu_to_be32(id); + if (!klm_entries) { /* this is invalidate */ + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + ucseg->flags = MLX5_UMR_INLINE; + mkc->status = MLX5_MKEY_STATUS_FREE; + return; + } + if (klm_type == KLM_UMR && !klm_offset) { ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_XLT_OCT_SIZE | MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_FREE);
@@ -259,8 +267,8 @@ build_nvmeotcp_static_params(struct mlx5e_nvmeotcp_queue *queue, static void mlx5e_nvmeotcp_fill_wi(struct mlx5e_nvmeotcp_queue *nvmeotcp_queue, - struct mlx5e_icosq *sq, u32 wqebbs, u16 pi, - enum wqe_type type) + struct mlx5e_icosq *sq, u32 wqebbs, + u16 pi, u16 ccid, enum wqe_type type) { struct mlx5e_icosq_wqe_info *wi = &sq->db.wqe_info[pi];
@@ -272,6 +280,10 @@ mlx5e_nvmeotcp_fill_wi(struct mlx5e_nvmeotcp_queue *nvmeotcp_queue, wi->wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP; wi->nvmeotcp_q.queue = nvmeotcp_queue; break; + case KLM_INV_UMR: + wi->wqe_type = MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE; + wi->nvmeotcp_qe.entry = &nvmeotcp_queue->ccid_table[ccid]; + break; default: /* cases where no further action is required upon completion, such as ddp setup */ wi->wqe_type = MLX5E_ICOSQ_WQE_UMR_NVMEOTCP;
@@ -290,7 +302,7 @@ mlx5e_nvmeotcp_rx_post_static_params_wqe(struct mlx5e_nvmeotcp_queue *queue, u32 wqebbs = MLX5E_TRANSPORT_SET_STATIC_PARAMS_WQEBBS; pi = mlx5e_icosq_get_next_pi(sq, wqebbs); wqe = MLX5E_TRANSPORT_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); - mlx5e_nvmeotcp_fill_wi(NULL, sq, wqebbs, pi, BSF_UMR); + mlx5e_nvmeotcp_fill_wi(NULL, sq, wqebbs, pi, 0, BSF_UMR); build_nvmeotcp_static_params(queue, wqe, resync_seq, queue->crc_rx); sq->pc += wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
@@ -307,7 +319,7 @@ mlx5e_nvmeotcp_rx_post_progress_params_wqe(struct mlx5e_nvmeotcp_queue *queue, u wqebbs = MLX5E_NVMEOTCP_PROGRESS_PARAMS_WQEBBS; pi = mlx5e_icosq_get_next_pi(sq, wqebbs); wqe = MLX5E_NVMEOTCP_FETCH_PROGRESS_PARAMS_WQE(sq, pi); - mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, SET_PSV_UMR); + mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, 0, SET_PSV_UMR); build_nvmeotcp_progress_params(queue, wqe, seq); sq->pc += wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
@@ -330,7 +342,7 @@ post_klm_wqe(struct mlx5e_nvmeotcp_queue *queue, wqebbs = DIV_ROUND_UP(wqe_sz, MLX5_SEND_WQE_BB); pi = mlx5e_icosq_get_next_pi(sq, wqebbs); wqe = MLX5E_NVMEOTCP_FETCH_KLM_WQE(sq, pi); - mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, wqe_type); + mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, ccid, wqe_type); build_nvmeotcp_klm_umr(queue, wqe, ccid, cur_klm_entries, klm_offset, klm_length, wqe_type); sq->pc += wqebbs;
@@ -345,7 +357,10 @@ mlx5e_nvmeotcp_post_klm_wqe(struct mlx5e_nvmeotcp_queue *queue, enum wqe_type wq struct mlx5e_icosq *sq = &queue->sq; u32 klm_offset = 0, wqes, i; - wqes = DIV_ROUND_UP(klm_length, queue->max_klms_per_wqe); + if (wqe_type == KLM_INV_UMR) + wqes = 1; + else + wqes = DIV_ROUND_UP(klm_length, queue->max_klms_per_wqe); spin_lock_bh(&queue->sq_lock);
@@ -844,12 +859,43 @@ void mlx5e_nvmeotcp_ctx_complete(struct mlx5e_icosq_wqe_info *wi) complete(&queue->static_params_done); } +void mlx5e_nvmeotcp_ddp_inv_done(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_nvmeotcp_queue_entry *q_entry = wi->nvmeotcp_qe.entry; + struct mlx5e_nvmeotcp_queue *queue = q_entry->queue; + struct mlx5_core_dev *mdev = queue->priv->mdev; + struct ulp_ddp_io *ddp = q_entry->ddp; + const struct ulp_ddp_ulp_ops *ulp_ops; + + dma_unmap_sg(mdev->device, ddp->sg_table.sgl, + q_entry->sgl_length, DMA_FROM_DEVICE); + + q_entry->sgl_length = 0; + + ulp_ops = inet_csk(queue->sk)->icsk_ulp_ddp_ops; + if (ulp_ops && ulp_ops->ddp_teardown_done) + ulp_ops->ddp_teardown_done(q_entry->ddp_ctx); +} + static void mlx5e_nvmeotcp_ddp_teardown(struct net_device *netdev, struct sock *sk, struct ulp_ddp_io *ddp, void *ddp_ctx) { + struct mlx5e_nvmeotcp_queue_entry *q_entry; + struct mlx5e_nvmeotcp_queue *queue; + + queue = container_of(ulp_ddp_get_ctx(sk), struct mlx5e_nvmeotcp_queue, ulp_ddp_ctx); + q_entry = &queue->ccid_table[ddp->command_id]; + WARN_ONCE(q_entry->sgl_length == 0, + "Invalidation of empty sgl (CID 0x%x, queue 0x%x)\n", + ddp->command_id, queue->id); + + q_entry->ddp_ctx = ddp_ctx; + q_entry->queue = queue; + + mlx5e_nvmeotcp_post_klm_wqe(queue, KLM_INV_UMR, ddp->command_id, 0); } static void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h
index 8b29f3fde7f2..13817d8a0aae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h@@ -109,6 +109,7 @@ void mlx5e_nvmeotcp_cleanup(struct mlx5e_priv *priv); struct mlx5e_nvmeotcp_queue * mlx5e_nvmeotcp_get_queue(struct mlx5e_nvmeotcp *nvmeotcp, int id); void mlx5e_nvmeotcp_put_queue(struct mlx5e_nvmeotcp_queue *queue); +void mlx5e_nvmeotcp_ddp_inv_done(struct mlx5e_icosq_wqe_info *wi); void mlx5e_nvmeotcp_ctx_complete(struct mlx5e_icosq_wqe_info *wi); static inline void mlx5e_nvmeotcp_init_rx(struct mlx5e_priv *priv) {} void mlx5e_nvmeotcp_cleanup_rx(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index c88c4972f708..89ef317bdd2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c@@ -968,6 +968,9 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) break; #endif #ifdef CONFIG_MLX5_EN_NVMEOTCP + case MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE: + mlx5e_nvmeotcp_ddp_inv_done(wi); + break; case MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP: mlx5e_nvmeotcp_ctx_complete(wi); break;
@@ -1073,6 +1076,9 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq, int budget) #ifdef CONFIG_MLX5_EN_NVMEOTCP case MLX5E_ICOSQ_WQE_UMR_NVMEOTCP: break; + case MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE: + mlx5e_nvmeotcp_ddp_inv_done(wi); + break; case MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP: mlx5e_nvmeotcp_ctx_complete(wi); break;
--
2.34.1