Thread (31 messages) 31 messages, 5 authors, 2025-02-03

RE: [PATCH rdma-next 09/13] RDMA/mana_ib: UD/GSI work requests

From: Long Li <longli@microsoft.com>
Date: 2025-01-23 18:20:36
Also in: linux-hyperv, linux-rdma, lkml

quoted hunk ↗ jump to hunk
-----Original Message-----
From: Konstantin Taranov <redacted>
Sent: Monday, January 20, 2025 9:27 AM
To: Konstantin Taranov <kotaranov@microsoft.com>; Shiraz Saleem
[off-list ref]; pabeni@redhat.com; Haiyang Zhang
[off-list ref]; KY Srinivasan [off-list ref];
edumazet@google.com; kuba@kernel.org; davem@davemloft.net; Dexuan Cui
[off-list ref]; wei.liu@kernel.org; sharmaajay@microsoft.com; Long
Li [off-list ref]; jgg@ziepe.ca; leon@kernel.org
Cc: linux-rdma@vger.kernel.org; linux-kernel@vger.kernel.org;
netdev@vger.kernel.org; linux-hyperv@vger.kernel.org
Subject: [PATCH rdma-next 09/13] RDMA/mana_ib: UD/GSI work requests

From: Konstantin Taranov <kotaranov@microsoft.com>

Implement post send and post recv for UD/GSI QPs.
Add information about posted requests into shadow queues.

Co-developed-by: Shiraz Saleem <redacted>
Signed-off-by: Shiraz Saleem <redacted>
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
---
 drivers/infiniband/hw/mana/Makefile           |   2 +-
 drivers/infiniband/hw/mana/device.c           |   2 +
 drivers/infiniband/hw/mana/mana_ib.h          |  33 ++++
 drivers/infiniband/hw/mana/qp.c               |  21 ++-
 drivers/infiniband/hw/mana/shadow_queue.h     | 115 ++++++++++++
 drivers/infiniband/hw/mana/wr.c               | 168 ++++++++++++++++++
 .../net/ethernet/microsoft/mana/gdma_main.c   |   2 +
 7 files changed, 341 insertions(+), 2 deletions(-)  create mode 100644
drivers/infiniband/hw/mana/shadow_queue.h
 create mode 100644 drivers/infiniband/hw/mana/wr.c
diff --git a/drivers/infiniband/hw/mana/Makefile
b/drivers/infiniband/hw/mana/Makefile
index 6e56f77..79426e7 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o

-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o
diff --git a/drivers/infiniband/hw/mana/device.c
b/drivers/infiniband/hw/mana/device.c
index d534ef1..1da86c3 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -40,6 +40,8 @@ static const struct ib_device_ops mana_ib_dev_ops = {
 	.mmap = mana_ib_mmap,
 	.modify_qp = mana_ib_modify_qp,
 	.modify_wq = mana_ib_modify_wq,
+	.post_recv = mana_ib_post_recv,
+	.post_send = mana_ib_post_send,
 	.query_device = mana_ib_query_device,
 	.query_gid = mana_ib_query_gid,
 	.query_pkey = mana_ib_query_pkey,
diff --git a/drivers/infiniband/hw/mana/mana_ib.h
b/drivers/infiniband/hw/mana/mana_ib.h
index 7b079d8..6265c39 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -14,6 +14,7 @@
 #include <linux/dmapool.h>

 #include <net/mana/mana.h>
+#include "shadow_queue.h"

 #define PAGE_SZ_BM                                                             \
 	(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K |        \
@@ -165,6 +166,9 @@ struct mana_ib_qp {
 	/* The port on the IB device, starting with 1 */
 	u32 port;

+	struct shadow_queue shadow_rq;
+	struct shadow_queue shadow_sq;
+
 	refcount_t		refcount;
 	struct completion	free;
 };
@@ -404,6 +408,30 @@ struct mana_rnic_set_qp_state_resp {
 	struct gdma_resp_hdr hdr;
 }; /* HW Data */

+enum WQE_OPCODE_TYPES {
+	WQE_TYPE_UD_SEND = 0,
+	WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+	u32 wqe_type	: 5;
+	u32 fence	: 1;
+	u32 signaled	: 1;
+	u32 solicited	: 1;
+	u32 psn		: 24;
+
+	u32 ssn_or_rqpn	: 24;
+	u32 reserved1	: 8;
+	union {
+		struct {
+			u32 remote_qkey;
+			u32 immediate;
+			u32 reserved1;
+			u32 reserved2;
+		} ud_send;
+	};
+}; /* HW DATA */
+
 static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)  {
 	return mdev->gdma_dev->gdma_context;
@@ -562,4 +590,9 @@ int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev
*mdev, struct mana_ib_qp *qp);  int mana_ib_create_ah(struct ib_ah *ibah,
struct rdma_ah_init_attr *init_attr,
 		      struct ib_udata *udata);
 int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr); int
mana_ib_post_send(struct
+ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr);
 #endif
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index fea45be..051ea03 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -562,10 +562,23 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp,
struct ib_pd *ibpd,
 	}
 	doorbell = gc->mana_ib.doorbell;

+	err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+				  sizeof(struct ud_rq_shadow_wqe));
+	if (err) {
+		ibdev_err(&mdev->ib_dev, "Failed to create shadow rq
err %d\n", err);
+		goto destroy_queues;
+	}
+	err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+				  sizeof(struct ud_sq_shadow_wqe));
+	if (err) {
+		ibdev_err(&mdev->ib_dev, "Failed to create shadow sq
err %d\n", err);
+		goto destroy_shadow_queues;
+	}
+
 	err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
 	if (err) {
 		ibdev_err(&mdev->ib_dev, "Failed to create ud qp  %d\n", err);
-		goto destroy_queues;
+		goto destroy_shadow_queues;
 	}
 	qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
 	qp->port = attr->port_num;
@@ -575,6 +588,9 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp,
struct ib_pd *ibpd,

 	return 0;

+destroy_shadow_queues:
+	destroy_shadow_queue(&qp->shadow_rq);
+	destroy_shadow_queue(&qp->shadow_sq);
 destroy_queues:
 	while (i-- > 0)
 		mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]); @@ -
754,6 +770,9 @@ static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp,
struct ib_udata *udata)
 		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
 	int i;

+	destroy_shadow_queue(&qp->shadow_rq);
+	destroy_shadow_queue(&qp->shadow_sq);
+
 	/* Ignore return code as there is not much we can do about it.
 	 * The error message is printed inside.
 	 */
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h
b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 0000000..d8bfb4c
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+	u16 opcode;
+	u16 error_code;
+	u32 posted_wqe_size;
+	u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+	struct shadow_wqe_header header;
+	u32 byte_len;
+	u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+	struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+	/* Unmasked producer index, Incremented on wqe posting */
+	u64 prod_idx;
+	/* Unmasked consumer index, Incremented on cq polling */
+	u64 cons_idx;
+	/* Unmasked index of next-to-complete (from HW) shadow WQE */
+	u64 next_to_complete_idx;
+	/* queue size in wqes */
+	u32 length;
+	/* distance between elements in bytes */
+	u32 stride;
+	/* ring buffer holding wqes */
+	void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue,
+uint32_t length, uint32_t stride) {
+	queue->buffer = kvmalloc(length * stride, GFP_KERNEL);
+	if (!queue->buffer)
+		return -ENOMEM;
+
+	queue->length = length;
+	queue->stride = stride;
+
+	return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue) {
+	kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue) {
+	return (queue->prod_idx - queue->cons_idx) >= queue->length; }
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue) {
+	return queue->prod_idx == queue->cons_idx; }
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64
+unmasked_index) {
+	u32 index = unmasked_index % queue->length;
+
+	return ((u8 *)queue->buffer + index * queue->stride); }
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue) {
+	return shadow_queue_get_element(queue, queue->prod_idx); }
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue) {
+	if (queue->cons_idx == queue->next_to_complete_idx)
+		return NULL;
+
+	return shadow_queue_get_element(queue, queue->cons_idx); }
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue) {
+	if (queue->next_to_complete_idx == queue->prod_idx)
+		return NULL;
+
+	return shadow_queue_get_element(queue, queue-
quoted
next_to_complete_idx); }
+
+static inline void shadow_queue_advance_producer(struct shadow_queue
+*queue) {
+	queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue
+*queue) {
+	queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct
+shadow_queue *queue) {
+	queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644 index 0000000..1813567
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct
+ib_recv_wr *wr) {
+	struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct
mana_ib_dev, ib_dev);
+	struct gdma_queue *queue = qp-
quoted
ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+	struct gdma_posted_wqe_info wqe_info = {0};
+	struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+	struct gdma_wqe_request wqe_req = {0};
+	struct ud_rq_shadow_wqe *shadow_wqe;
+	int err, i;
+
+	if (shadow_queue_full(&qp->shadow_rq))
+		return -EINVAL;
+
+	if (wr->num_sge > MAX_WR_SGL_NUM)
+		return -EINVAL;
+
+	for (i = 0; i < wr->num_sge; ++i) {
+		gdma_sgl[i].address = wr->sg_list[i].addr;
+		gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+		gdma_sgl[i].size = wr->sg_list[i].length;
+	}
+	wqe_req.num_sge = wr->num_sge;
+	wqe_req.sgl = gdma_sgl;
+
+	err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+	if (err)
+		return err;
+
+	shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+	memset(shadow_wqe, 0, sizeof(*shadow_wqe));
I would avoid using memset since this is on data path.

The patch looks good otherwise.

Long
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help