[RFC net-next 11/17] mptcp: implement mptcp-specific tls protocol ops
From: Geliang Tang <geliang@kernel.org>
Date: 2026-06-22 10:45:16
Also in:
mptcp
Subsystem:
networking [general], networking [mptcp], networking [tcp], networking [tls], the rest · Maintainers:
"David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Matthieu Baerts, Mat Martineau, Neal Cardwell, John Fastabend, Sabrina Dubroca, Linus Torvalds
From: Geliang Tang <redacted> This patch implements the MPTCP-specific struct tls_prot_ops, named 'tls_mptcp_ops'. Passing an MPTCP socket to tcp_sock_rate_check_app_limited() can trigger a crash. Here, an MPTCP version of check_app_limited() is implemented, which calls tcp_sock_rate_check_app_limited() for each subflow. When MPTCP implements lock_is_held interface, it not only checks sock_owned_by_user_nocheck(sk) as TCP does, but also needs to check whether the MPTCP data lock is held. This is required because TLS may call lock_is_held from softirq context with bh_lock_sock held. Checking both conditions ensures TLS always defers to workqueue when the MPTCP data lock is held, avoiding deadlock. Implement mptcp_skb_get_header() to handle fragmented MPTCP skbs when copying TLS record headers. In tls_strp_read_sock(), tls_strp_load_anchor_with_queue() first attaches the skbs from TCP/MPTCP to the frag_list of strp->anchor. In TCP, this is fine because the skb data is contiguous; however, in MPTCP, each skb has its own offset, causing the data to be non-contiguous. As a result, during the subsequent tls_rx_msg_size() process, skb_copy_bits() may access across skbs. In MPTCP, the offset of the second skb is ignored, leading to data access errors. Therefore, mptcp_skb_get_header() can effectively handle this problem and obtain the correct TLS header. In the later process, tls_strp_check_queue_ok() handles the copy_mode scenario. When an MPTCP skb has a non-zero offset, it falls back to copy_mode, copying the valid data from each skb one by one into anchor->frag_list, thus resolving the offset issue. Hence, the impact of the offset within the TLS module is completely eliminated. Co-developed-by: Gang Yan <redacted> Signed-off-by: Gang Yan <redacted> Co-developed-by: Zqiang <qiang.zhang@linux.dev> Signed-off-by: Zqiang <qiang.zhang@linux.dev> Signed-off-by: Geliang Tang <redacted> --- include/net/mptcp.h | 2 + include/net/tcp.h | 1 + net/ipv4/tcp.c | 9 +++- net/mptcp/protocol.c | 113 +++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 1 + net/tls/tls_main.c | 13 +++++ 6 files changed, 137 insertions(+), 2 deletions(-)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 333bde2a0b76..ba2257986b13 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h@@ -132,6 +132,8 @@ struct mptcp_pm_ops { void (*release)(struct mptcp_sock *msk); } ____cacheline_aligned_in_smp; +extern struct tls_prot_ops tls_mptcp_ops; + #ifdef CONFIG_MPTCP void mptcp_init(void);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6d376ea4d1c0..ac823492d3e4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h@@ -849,6 +849,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); +void tcp_sock_rate_check_app_limited(struct tcp_sock *tp); void tcp_rate_check_app_limited(struct sock *sk); /* Read 'sendfile()'-style from a TCP socket */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b427f924608c..b875be6ae5bc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c@@ -1096,9 +1096,9 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, } /* If a gap is detected between sends, mark the socket application-limited. */ -void tcp_rate_check_app_limited(struct sock *sk) +void tcp_sock_rate_check_app_limited(struct tcp_sock *tp) { - struct tcp_sock *tp = tcp_sk(sk); + struct sock *sk = (struct sock *)tp; if (/* We have less than one packet to send. */ tp->write_seq - tp->snd_nxt < tp->mss_cache &&
@@ -1111,6 +1111,11 @@ void tcp_rate_check_app_limited(struct sock *sk) tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; } + +void tcp_rate_check_app_limited(struct sock *sk) +{ + tcp_sock_rate_check_app_limited(tcp_sk(sk)); +} EXPORT_SYMBOL_GPL(tcp_rate_check_app_limited); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 18c8b6c64c3f..f4cd7a6e5770 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c@@ -24,6 +24,7 @@ #include <net/mptcp.h> #include <net/hotdata.h> #include <net/xfrm.h> +#include <net/tls.h> #include <asm/ioctls.h> #include "protocol.h" #include "mib.h"
@@ -4894,3 +4895,115 @@ int __init mptcp_proto_v6_init(void) return err; } #endif + +static bool mptcp_lock_is_held(struct sock *sk) +{ + return sock_owned_by_user_nocheck(sk) || + mptcp_data_is_locked(sk); +} + +static void mptcp_read_done(struct sock *sk, size_t len) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *skb; + size_t left; + u32 offset; + + msk_owned_by_me(msk); + + if (sk->sk_state == TCP_LISTEN) + return; + + left = len; + while (left && (skb = mptcp_recv_skb(sk, &offset)) != NULL) { + int used; + + used = min_t(size_t, skb->len - offset, left); + msk->bytes_consumed += used; + MPTCP_SKB_CB(skb)->offset += used; + MPTCP_SKB_CB(skb)->map_seq += used; + left -= used; + + if (skb->len > offset + used) + break; + + mptcp_eat_recv_skb(sk, skb); + } + + mptcp_rcv_space_adjust(msk, len - left); + + /* Clean up data we have read: This will do ACK frames. */ + if (left != len) + mptcp_cleanup_rbuf(msk, len - left); +} + +static u32 mptcp_get_skb_seq(struct sk_buff *skb) +{ + return MPTCP_SKB_CB(skb)->map_seq - MPTCP_SKB_CB(skb)->offset; +} + +static int mptcp_skb_get_header(const struct sk_buff *skb, int off, + void *buf, int len) +{ + const struct sk_buff *iter = skb_shinfo(skb)->frag_list; + int copied = 0; + int ret = 0; + + if (!iter) + return skb_copy_bits(skb, off, buf, len); + + /* Make absolute to positive */ + off -= MPTCP_SKB_CB(iter)->offset; + + while (iter && copied < len) { + int skb_off = MPTCP_SKB_CB(iter)->offset; + int data_len = iter->len - skb_off; + int count; + + if (off >= data_len) { + off -= data_len; /* MPTCP skb avail data */ + iter = iter->next; + continue; + } + + count = min((int)(data_len - off), len - copied); + ret = skb_copy_bits(iter, skb_off + off, buf + copied, count); + if (ret) + break; + copied += count; + off = 0; + iter = iter->next; + } + + if (copied < len && !ret) + ret = -EFAULT; + return ret; +} + +static void mptcp_check_app_limited(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_subflow_context *subflow; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + + slow = lock_sock_fast(ssk); + tcp_sock_rate_check_app_limited(tcp_sk(ssk)); + unlock_sock_fast(ssk, slow); + } +} + +struct tls_prot_ops tls_mptcp_ops = { + .owner = THIS_MODULE, + .protocol = IPPROTO_MPTCP, + .recv_skb = mptcp_recv_skb, + .lock_is_held = mptcp_lock_is_held, + .read_done = mptcp_read_done, + .get_skb_seq = mptcp_get_skb_seq, + .skb_get_header = mptcp_skb_get_header, + .epollin_ready = mptcp_epollin_ready, + .check_app_limited = mptcp_check_app_limited, +}; +EXPORT_SYMBOL(tls_mptcp_ops);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index da40c6f3705f..6dea626348d9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h@@ -380,6 +380,7 @@ struct mptcp_sock { #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) +#define mptcp_data_is_locked(sk) spin_is_locked(&(sk)->sk_lock.slock) #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b45890e75c9e..170ccbb9d36d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c@@ -1366,6 +1366,12 @@ static int __init tls_register(void) if (err) goto err_strp; +#ifdef CONFIG_MPTCP + err = tls_register_prot_ops(&tls_mptcp_ops); + if (err) + goto err_tcp; +#endif + err = tls_device_init(); if (err) goto err_ops;
@@ -1374,6 +1380,10 @@ static int __init tls_register(void) return 0; err_ops: +#ifdef CONFIG_MPTCP + tls_unregister_prot_ops(&tls_mptcp_ops); +err_tcp: +#endif tls_unregister_prot_ops(&tls_tcp_ops); err_strp: tls_strp_dev_exit();
@@ -1385,6 +1395,9 @@ static int __init tls_register(void) static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); +#ifdef CONFIG_MPTCP + tls_unregister_prot_ops(&tls_mptcp_ops); +#endif tls_unregister_prot_ops(&tls_tcp_ops); tls_strp_dev_exit(); tls_device_cleanup();
--
2.53.0