[PATCH nf-next] ipvs: add conn_max sysctl to limit connections
From: Julian Anastasov <ja@ssi.bg>
Date: 2026-05-22 10:56:20
Also in:
lvs-devel
Subsystem:
documentation, ipvs, netfilter, networking [general], networking [ipv4/ipv6], the rest · Maintainers:
Jonathan Corbet, Simon Horman, Julian Anastasov, Pablo Neira Ayuso, Florian Westphal, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, David Ahern, Ido Schimmel, Linus Torvalds
Currently, we are using atomic_t to track the number of connections. On 64-bit setups with large memory there is a risk this counter to overflow. Also, setups with many containers may need to tune the limit for connections. Add sysctl control to limit the number of connections to 1,073,741,824 (64-bit) and 16,777,216 (32-bit). Depending on the admin's privilege, the value is used to change a soft or hard limit allowing unprivileged admins to change the soft limit in range determined by privileged admins. Signed-off-by: Julian Anastasov <ja@ssi.bg> --- Documentation/networking/ipvs-sysctl.rst | 35 ++++++++++++++++++ include/net/ip_vs.h | 22 +++++++++++ net/netfilter/ipvs/ip_vs_conn.c | 10 ++++- net/netfilter/ipvs/ip_vs_ctl.c | 47 ++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
index a556439f8be7..b6bac2612420 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst@@ -56,6 +56,41 @@ conn_lfactor - INTEGER -4: grow if load goes above 6% (buckets = nodes * 16) 2: grow if load goes above 400% (buckets = nodes / 4) +conn_max - INTEGER + Limit for number of connections, per netns. + + Controls the soft and hard limit for number of connections. + Initially, the platform specific limit is assigned for init_net. + The value can be changed and later the soft limit propagated + to other networking namespaces. + + Privileged admin can change both limits up to the value of the + platform limit while the unprivileged admin can change only the + soft limit up to the value of the hard limit. + + For setups using conntrack=1 (CONFIG_IP_VS_NFCT for + Netfilter connection tracking) the connections can be + limited also by nf_conntrack_max. + + soft limit hard limit + ===================================================== + init_net: + create netns platform platform + priv admin 0 .. platform 0 .. platform + ===================================================== + new netns: + create netns init_net:soft init_net:soft + priv admin 0 .. platform 0 .. platform + unpriv admin 0 .. hard N/A + + Limits per platform: + 1,073,741,824 (2^30 for 64-bit) + 16,777,216 (2^24 for 32-bit) + + Possible values: 0 .. platform limit + + Default: platform limit + conn_reuse_mode - INTEGER 1 - default
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a02e569813d2..5b3d1c681231 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h@@ -44,6 +44,14 @@ #define IP_VS_CONN_TAB_MAX_BITS 20 #endif +/* conn_max limits */ +#if BITS_PER_LONG > 32 +/* Limit of atomic_t but restricted by roundup_pow_of_two() in ip_vs_core.c */ +#define IP_VS_CONN_MAX (1 << 30) +#else +#define IP_VS_CONN_MAX (1 << 24) +#endif + /* svc_table limits */ #define IP_VS_SVC_TAB_MIN_BITS 4 #define IP_VS_SVC_TAB_MAX_BITS 20
@@ -1220,6 +1228,10 @@ struct netns_ipvs { /* sysctl variables */ int sysctl_amemthresh; int sysctl_am_droprate; +#ifdef CONFIG_SYSCTL + int sysctl_conn_max;/* soft limit for conns */ + int conn_max_limit; /* hard limit for conn_max */ +#endif int sysctl_drop_entry; int sysctl_drop_packet; int sysctl_secure_tcp;
@@ -1317,6 +1329,11 @@ struct netns_ipvs { #ifdef CONFIG_SYSCTL +static inline int sysctl_conn_max(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_conn_max); +} + static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) { return ipvs->sysctl_sync_threshold[0];
@@ -1436,6 +1453,11 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs) #else +static inline int sysctl_conn_max(struct netns_ipvs *ipvs) +{ + return IP_VS_CONN_MAX; +} + static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) { return DEFAULT_SYNC_THRESHOLD;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9ea6b4fa78bf..e76a73d183d5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c@@ -1358,9 +1358,18 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, struct netns_ipvs *ipvs = p->ipvs; struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs, p->protocol); + /* Increment conn_count up to conn_max */ + int count = atomic_read(&ipvs->conn_count); + int max = sysctl_conn_max(ipvs); + + do { + if (count >= max) + return NULL; + } while (!atomic_try_cmpxchg(&ipvs->conn_count, &count, count + 1)); cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { + atomic_dec(&ipvs->conn_count); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NULL; }
@@ -1414,7 +1423,6 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, cp->in_seq.delta = 0; cp->out_seq.delta = 0; - atomic_inc(&ipvs->conn_count); if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) { int af_id = ip_vs_af_index(cp->af);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd9cae44d214..bd9d494b208a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c@@ -2319,6 +2319,39 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) #ifdef CONFIG_SYSCTL +static int +proc_do_conn_max(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = table->data; + int val = *valp; + int rc; + + const struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (write && (*valp != val)) { + struct netns_ipvs *ipvs = table->extra2; + bool priv = capable(CAP_NET_ADMIN); + /* Unprivileged admins can not go above the hard limit */ + int max = priv ? IP_VS_CONN_MAX : ipvs->conn_max_limit; + + if (val < 0 || val > max) { + rc = -EINVAL; + } else { + /* Privileged admin changes both limits */ + if (priv) + ipvs->conn_max_limit = val; + WRITE_ONCE(*valp, val); + } + } + return rc; +} + static int proc_do_defense_mode(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos)
@@ -2623,6 +2656,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "conn_max", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_do_conn_max, + }, { .procname = "drop_entry", .maxlen = sizeof(int),
@@ -4977,6 +5016,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_amemthresh; ipvs->sysctl_am_droprate = 10; tbl[idx++].data = &ipvs->sysctl_am_droprate; + + /* Inherit both limits from init_net:conn_max */ + ipvs->conn_max_limit = net_eq(net, &init_net) ? IP_VS_CONN_MAX : + READ_ONCE(*(int *)vs_vars[idx].data); + ipvs->sysctl_conn_max = ipvs->conn_max_limit; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_conn_max; + tbl[idx++].data = &ipvs->sysctl_drop_entry; tbl[idx++].data = &ipvs->sysctl_drop_packet; #ifdef CONFIG_IP_VS_NFCT
--
2.54.0