Re: [PATCH v2 2/2] nvme-tcp: send H2CData PDUs based on MAXH2CDATA
From: Sagi Grimberg <sagi@grimberg.me>
Date: 2021-11-22 11:40:51
On 11/22/21 12:25 PM, Varun Prakash wrote:
quoted hunk ↗ jump to hunk
As per NVMe/TCP specification (revision 1.0a, section 3.6.2.3) Maximum Host to Controller Data length (MAXH2CDATA): Specifies the maximum number of PDU-Data bytes per H2CData PDU in bytes. This value is a multiple of dwords and should be no less than 4,096. Current code sets H2CData PDU data_length to r2t_length, it does not check MAXH2CDATA value. Fix this by setting H2CData PDU data_length to min(req->h2cdata_left, queue->maxh2cdata). Also validate MAXH2CDATA value returned by target in ICResp PDU, if it is not a multiple of dword or if it is less than 4096 return -EINVAL from nvme_tcp_init_connection(). Signed-off-by: Varun Prakash <varun@chelsio.com> --- v2: removed nvme_tcp_update_h2c_data_pdu() used sock_no_sendpage() instead of kernel_sendmsg() drivers/nvme/host/tcp.c | 41 +++++++++++++++++++++++++++++++++++------ include/linux/nvme-tcp.h | 1 + 2 files changed, 36 insertions(+), 6 deletions(-)diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 92e07d2..82f8926 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c@@ -97,6 +97,7 @@ struct nvme_tcp_queue { struct nvme_tcp_request *request; int queue_size; + u32 maxh2cdata; size_t cmnd_capsule_len; struct nvme_tcp_ctrl *ctrl; unsigned long flags;@@ -582,7 +583,7 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req) u8 hdgst = nvme_tcp_hdgst_len(queue); u8 ddgst = nvme_tcp_ddgst_len(queue); - req->pdu_len = req->h2cdata_left; + req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata); req->pdu_sent = 0; memset(data, 0, sizeof(*data));@@ -933,6 +934,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; int req_data_len = req->data_len; + u32 h2cdata_left = req->h2cdata_left;
No need for the local variable, the reference only happens when the pdu data transfer is completed.
quoted hunk ↗ jump to hunk
while (true) { struct page *page = nvme_tcp_req_cur_page(req);@@ -977,7 +979,13 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) req->state = NVME_TCP_SEND_DDGST; req->offset = 0; } else { - nvme_tcp_done_send_req(queue); + if (h2cdata_left) { + nvme_tcp_setup_h2c_data_pdu(req); + req->state = NVME_TCP_SEND_H2C_PDU; + req->offset = 0;
req->state and req->offset setting can move to nvme_tcp_setup_h2c_data_pdu() as commented in patch 1.
quoted hunk ↗ jump to hunk
+ } else { + nvme_tcp_done_send_req(queue); + } } return 1; }@@ -1028,16 +1036,21 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_data_pdu *pdu = req->pdu; + struct page *page = virt_to_page(pdu); + size_t offset = offset_in_page(pdu) + req->offset; u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) - req->offset + hdgst; + int flags = MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST; int ret;
Not sure the local variables are needed, but OK, I'm fine with that.
quoted hunk ↗ jump to hunk
if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); - ret = kernel_sendpage(queue->sock, virt_to_page(pdu), - offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); + if (req->h2cdata_left) + ret = sock_no_sendpage(queue->sock, page, offset, len, flags); + else + ret = kernel_sendpage(queue->sock, page, offset, len, flags); + if (unlikely(ret <= 0)) return ret;@@ -1057,6 +1070,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; size_t offset = req->offset; + u32 h2cdata_left = req->h2cdata_left;
No need for local variable.
quoted hunk ↗ jump to hunk
int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = {@@ -1074,7 +1088,13 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) return ret; if (offset + ret == NVME_TCP_DIGEST_LENGTH) { - nvme_tcp_done_send_req(queue); + if (h2cdata_left) { + nvme_tcp_setup_h2c_data_pdu(req); + req->state = NVME_TCP_SEND_H2C_PDU; + req->offset = 0; + } else { + nvme_tcp_done_send_req(queue); + } return 1; }@@ -1260,6 +1280,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) struct msghdr msg = {}; struct kvec iov; bool ctrl_hdgst, ctrl_ddgst; + u32 maxh2cdata; int ret; icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);@@ -1343,6 +1364,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) goto free_icresp; } + maxh2cdata = le32_to_cpu(icresp->maxdata); + if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) { + pr_err("queue %d: invalid maxh2cdata returned %u\n", + nvme_tcp_queue_id(queue), maxh2cdata); + goto free_icresp; + } + queue->maxh2cdata = maxh2cdata; + ret = 0; free_icresp: kfree(icresp);diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 959e0bd..7547015 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h@@ -12,6 +12,7 @@ #define NVME_TCP_DISC_PORT 8009 #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 +#define NVME_TCP_MIN_MAXH2CDATA 4096 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0,