Re: [PATCH v3 7/8] nvmet-tcp: Support KeyUpdate

From: Hannes Reinecke <hare@suse.de>
Date: 2025-10-06 06:48:48
Also in: linux-doc, linux-nfs, linux-nvme, lkml

On 10/3/25 06:31, alistair23@gmail.com wrote:

quoted hunk ↗ jump to hunk

From: Alistair Francis <redacted>

If the nvmet_tcp_try_recv() function return EKEYEXPIRED or if we receive
a KeyUpdate handshake type then the underlying TLS keys need to be
updated.

If the NVMe Host (TLS client) initiates a KeyUpdate this patch will
allow the NVMe layer to process the KeyUpdate request and forward the
request to userspace. Userspace must then update the key to keep the
connection alive.

This patch allows us to handle the NVMe host sending a KeyUpdate
request without aborting the connection. At this time we don't support
initiating a KeyUpdate.

Link: https://datatracker.ietf.org/doc/html/rfc8446#section-4.6.3
Signed-off-by: Alistair Francis <redacted>
---
v3:
  - Use a write lock for sk_user_data
  - Fix build with CONFIG_NVME_TARGET_TCP_TLS disabled
  - Remove unused variable
v2:
  - Use a helper function for KeyUpdates
  - Ensure keep alive timer is stopped
  - Wait for TLS KeyUpdate to complete

  drivers/nvme/target/tcp.c | 90 ++++++++++++++++++++++++++++++++++++---
  1 file changed, 85 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index bee0355195f5..fd59dd3ca632 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c

@@ -175,6 +175,7 @@ struct nvmet_tcp_queue {
  
  	/* TLS state */
  	key_serial_t		tls_pskid;
+	key_serial_t		user_session_id;
  	struct delayed_work	tls_handshake_tmo_work;
  
  	unsigned long           poll_end;

@@ -186,6 +187,8 @@ struct nvmet_tcp_queue {
  	struct sockaddr_storage	sockaddr_peer;
  	struct work_struct	release_work;
  
+	struct completion       tls_complete;
+
  	int			idx;
  	struct list_head	queue_list;

@@ -836,6 +839,11 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
  	return 1;
  }
  
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue);
+static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w);
+#endif
+
  static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
  		int budget, int *sends)
  {

And we need this why?

quoted hunk ↗ jump to hunk

@@ -844,6 +852,13 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
  	for (i = 0; i < budget; i++) {
  		ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
  		if (unlikely(ret < 0)) {
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+			if (ret == -EKEYEXPIRED &&
+				queue->state != NVMET_TCP_Q_DISCONNECTING &&
+				queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
+					goto done;
+			}
+#endif
  			nvmet_tcp_socket_error(queue, ret);
  			goto done;
  		} else if (ret == 0) {

See my comment to the host patches. Handling an incoming KeyUpdate is
vastly different than initiating a KeyUpdate. _and_ the network stack
will only ever return -EKEYEXPIRED on receive.
So please split the patches in handling an incoming KeyUpdate and
initiating a KeyUpdate.

quoted hunk ↗ jump to hunk

@@ -1110,6 +1125,45 @@ static inline bool nvmet_tcp_pdu_valid(u8 type)
  	return false;
  }
  
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+static int update_tls_keys(struct nvmet_tcp_queue *queue)
+{
+	int ret;
+
+	cancel_work(&queue->io_work);
+	queue->state = NVMET_TCP_Q_TLS_HANDSHAKE;
+
+	/* Restore the default callbacks before starting upcall */
+	write_lock_bh(&queue->sock->sk->sk_callback_lock);
+	queue->sock->sk->sk_data_ready =  queue->data_ready;
+	queue->sock->sk->sk_state_change = queue->state_change;
+	queue->sock->sk->sk_write_space = queue->write_space;
+	queue->sock->sk->sk_user_data = NULL;
+	write_unlock_bh(&queue->sock->sk->sk_callback_lock);
+

We do have a function for this ...

quoted hunk ↗ jump to hunk

+	nvmet_stop_keep_alive_timer(queue->nvme_sq.ctrl);
+
+	INIT_DELAYED_WORK(&queue->tls_handshake_tmo_work,
+			  nvmet_tcp_tls_handshake_timeout);
+
+	ret = nvmet_tcp_tls_handshake(queue, HANDSHAKE_KEY_UPDATE_TYPE_RECEIVED);
+
+	if (ret < 0)
+		return ret;
+
+	ret = wait_for_completion_interruptible_timeout(&queue->tls_complete, 10 * HZ);
+
+	if (ret <= 0) {
+		tls_handshake_cancel(queue->sock->sk);
+		return ret;
+	}
+
+	queue->state = NVMET_TCP_Q_LIVE;
+
+	return ret;
+}
+#endif
+
  static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue *queue,
  		struct msghdr *msg, char *cbuf)
  {

@@ -1135,6 +1189,9 @@ static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue *queue,
  			ret = -EAGAIN;
  		}
  		break;
+	case TLS_RECORD_TYPE_HANDSHAKE:
+		ret = -EAGAIN;
+		break;

Shouldn't this be rather -EKEYEXPIRED?

quoted hunk ↗ jump to hunk

  	default:
  		/* discard this record type */
  		pr_err("queue %d: TLS record %d unhandled\n",

@@ -1344,6 +1401,13 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
  	for (i = 0; i < budget; i++) {
  		ret = nvmet_tcp_try_recv_one(queue);
  		if (unlikely(ret < 0)) {
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+			if (ret == -EKEYEXPIRED &&
+				queue->state != NVMET_TCP_Q_DISCONNECTING &&
+				queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
+					goto done;
+			}
+#endif
  			nvmet_tcp_socket_error(queue, ret);
  			goto done;
  		} else if (ret == 0) {
@@ -1408,14 +1472,26 @@ static void nvmet_tcp_io_work(struct work_struct *w)
  		ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
  		if (ret > 0)
  			pending = true;
-		else if (ret < 0)
-			return;
+		else if (ret < 0) {
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+			if (ret == -EKEYEXPIRED)
+				update_tls_keys(queue);
+			else
+#endif
+				return;
+		}
  
  		ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
  		if (ret > 0)
  			pending = true;
-		else if (ret < 0)
-			return;
+		else if (ret < 0) {
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+			if (ret == -EKEYEXPIRED)
+				update_tls_keys(queue);
+			else
+#endif
+				return;
+		}
  
  	} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
  

Wouldn't it be better to move the call to 'update_tls_keys()' out of
the loop and just requeue io_work afterwards?

quoted hunk ↗ jump to hunk

@@ -1798,6 +1874,7 @@ static void nvmet_tcp_tls_handshake_done(void *data, int status,
  	}
  	if (!status) {
  		queue->tls_pskid = peerid;
+		queue->user_session_id = user_session_id;
  		queue->state = NVMET_TCP_Q_CONNECTING;
  	} else
  		queue->state = NVMET_TCP_Q_FAILED;
@@ -1813,6 +1890,7 @@ static void nvmet_tcp_tls_handshake_done(void *data, int status,
  	else
  		nvmet_tcp_set_queue_sock(queue);
  	kref_put(&queue->kref, nvmet_tcp_release_queue);
+	complete(&queue->tls_complete);
  }
  
  static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w)
@@ -1843,7 +1921,7 @@ static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue,
  	int ret = -EOPNOTSUPP;
  	struct tls_handshake_args args;
  
-	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
+	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE && !keyupdate) {
  		pr_warn("cannot start TLS in state %d\n", queue->state);
  		return -EINVAL;

Why? Shouldn't we always set the HANDSHAKE state?

quoted hunk ↗ jump to hunk

@@ -1856,7 +1934,9 @@ static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue,
  	args.ta_data = queue;
  	args.ta_keyring = key_serial(queue->port->nport->keyring);
  	args.ta_timeout_ms = tls_handshake_timeout * 1000;
+	args.user_session_id = queue->user_session_id;
  
+	init_completion(&queue->tls_complete);
  	ret = tls_server_hello_psk(&args, GFP_KERNEL, keyupdate);
  	if (ret) {
  		kref_put(&queue->kref, nvmet_tcp_release_queue)

Cheers,
Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help