Re: [PATCH v3 6/8] nvme-tcp: Support KeyUpdate

From: Hannes Reinecke <hare@suse.de>
Date: 2025-10-06 06:34:36
Also in: linux-doc, linux-nfs, linux-nvme, lkml

On 10/3/25 06:31, alistair23@gmail.com wrote:

From: Alistair Francis <redacted>

If the nvme_tcp_try_send() or nvme_tcp_try_recv() functions return
EKEYEXPIRED then the underlying TLS keys need to be updated. This occurs
on an KeyUpdate event.

If the NVMe Target (TLS server) initiates a KeyUpdate this patch will
allow the NVMe layer to process the KeyUpdate request and forward the
request to userspace. Userspace must then update the key to keep the
connection alive.

This patch allows us to handle the NVMe target sending a KeyUpdate
request without aborting the connection. At this time we don't support
initiating a KeyUpdate.

Link: https://datatracker.ietf.org/doc/html/rfc8446#section-4.6.3
Signed-off-by: Alistair Francis <redacted>
---
v3:
  - Don't cancel existing handshake requests
v2:
  - Don't change the state
  - Use a helper function for KeyUpdates
  - Continue sending in nvme_tcp_send_all() after a KeyUpdate
  - Remove command message using recvmsg

  drivers/nvme/host/tcp.c | 60 ++++++++++++++++++++++++++++++++++++-----
  1 file changed, 54 insertions(+), 6 deletions(-)

Weelll ... Checking the code the network stack will only return
-EKEYEXPIRED on recvmsg() (as it parses the TLS message on receive).
So really this is handling an incoming KeyUpdate request only.
And I would keep it that way, as initiating a KeyUpdate request
is quite different (conceptually) than handling an incoming one.

quoted hunk ↗ jump to hunk

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index b07401ad68eb..4f27319f0078 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c

@@ -172,6 +172,7 @@ struct nvme_tcp_queue {
  	bool			tls_enabled;
  	u32			rcv_crc;
  	u32			snd_crc;
+	key_serial_t		user_session_id;
  	__le32			exp_ddgst;
  	__le32			recv_ddgst;
  	struct completion       tls_complete;

@@ -211,6 +212,7 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
  			      struct nvme_tcp_queue *queue,
  			      key_serial_t pskid,
  			      handshake_key_update_type keyupdate);
+static void update_tls_keys(struct nvme_tcp_queue *queue);
  
  static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
  {

@@ -394,6 +396,14 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
  	do {
  		ret = nvme_tcp_try_send(queue);
  	} while (ret > 0);
+
+	if (ret == -EKEYEXPIRED) {
+		update_tls_keys(queue);
+
+		do {
+			ret = nvme_tcp_try_send(queue);
+		} while (ret > 0);
+	}
  }

See above. I'd rather have two patches, one for handling incoming
KeyUpdate message (where we'd see an EKEYEXPIRED on receive), and
another one for initiating KeyUpdates.

quoted hunk ↗ jump to hunk

  static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue)

@@ -1346,6 +1356,8 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
  done:
  	if (ret == -EAGAIN) {
  		ret = 0;
+	} else if (ret == -EKEYEXPIRED) {
+		goto out;
  	} else if (ret < 0) {
  		dev_err(queue->ctrl->ctrl.device,
  			"failed to send request %d\n", ret);

See above.

quoted hunk ↗ jump to hunk

@@ -1381,17 +1393,45 @@ static int nvme_tcp_try_recvmsg(struct nvme_tcp_queue *queue)
  		}
  	} while (result >= 0);
  
-	if (result < 0 && result != -EAGAIN) {
+	if (result == -EKEYEXPIRED) {
+		return -EKEYEXPIRED;
+	} else if (result == -EAGAIN) {
+		result = 0;
+	} else if (result < 0) {
  		dev_err(queue->ctrl->ctrl.device,
  			"receive failed:  %d\n", result);
  		queue->rd_enabled = false;
  		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
-	} else if (result == -EAGAIN)
-		result = 0;
+	}
  
  	return result < 0 ? result : (queue->nr_cqe = nr_cqe);
  }
  

Remind me again to resend my tcp_recvmsg patch. The blanking out
of -EAGAIN is actually wrong.

+static void update_tls_keys(struct nvme_tcp_queue *queue)
+{
+	int qid = nvme_tcp_queue_id(queue);
+	int ret;
+
+	dev_dbg(queue->ctrl->ctrl.device,
+		"updating key for queue %d\n", qid);
+
+	cancel_work(&queue->io_work);
+

Hmm.
You issue a 'cancel_work()', but later on you call this
function from within the workqueue context. Not a great
idea.
I'd rather calling 'update_tls_keys()' from io_work() only,
and drop the 'cancel_work()' invocation as that would be
pointless now.

quoted hunk ↗ jump to hunk

+	nvme_stop_keep_alive(&(queue->ctrl->ctrl));
+	flush_work(&(queue->ctrl->ctrl).async_event_work);
+
+	ret = nvme_tcp_start_tls(&(queue->ctrl->ctrl),
+				 queue, queue->ctrl->ctrl.tls_pskid,
+				 HANDSHAKE_KEY_UPDATE_TYPE_RECEIVED);
+
+	if (ret < 0) {
+		dev_err(queue->ctrl->ctrl.device,
+			"failed to update the keys %d\n", ret);
+		nvme_tcp_fail_request(queue->request);
+		nvme_tcp_done_send_req(queue);
+	}
+}
+
  static void nvme_tcp_io_work(struct work_struct *w)
  {
  	struct nvme_tcp_queue *queue =

@@ -1407,15 +1447,21 @@ static void nvme_tcp_io_work(struct work_struct *w)
  			mutex_unlock(&queue->send_mutex);
  			if (result > 0)
  				pending = true;
-			else if (unlikely(result < 0))
+			else if (unlikely(result < 0)) {
+				if (result == -EKEYEXPIRED)
+					update_tls_keys(queue);
  				break;
+			}
  		}
  
  		result = nvme_tcp_try_recvmsg(queue);
  		if (result > 0)
  			pending = true;
-		else if (unlikely(result < 0))
-			return;
+		else if (unlikely(result < 0)) {
+			if (result == -EKEYEXPIRED)
+				update_tls_keys(queue);
+			break;
+		}
  
  		/* did we get some space after spending time in recv? */
  		if (nvme_tcp_queue_has_pending(queue) &&
@@ -1723,6 +1769,7 @@ static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid,
  			ctrl->ctrl.tls_pskid = key_serial(tls_key);
  		key_put(tls_key);
  		queue->tls_err = 0;
+		queue->user_session_id = user_session_id;
  	}
  
  out_complete:
@@ -1752,6 +1799,7 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
  		keyring = key_serial(nctrl->opts->keyring);
  	args.ta_keyring = keyring;
  	args.ta_timeout_ms = tls_handshake_timeout * 1000;
+	args.user_session_id = queue->user_session_id;
  	queue->tls_err = -EOPNOTSUPP;
  	init_completion(&queue->tls_complete);
  	ret = tls_client_hello_psk(&args, GFP_KERNEL, keyupdate);

Cheers,
Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help