Thread (13 messages) 13 messages, 1 author, 16h ago
HOTtoday

[PATCH v1 net-next 09/10] net: fib_rules: Only hold RTNL for the first IPv4 RTM_NEWRULE.

From: Kuniyuki Iwashima <kuniyu@google.com>
Date: 2026-06-29 18:12:37
Subsystem: networking [general], networking [ipv4/ipv6], the rest · Maintainers: "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, David Ahern, Ido Schimmel, Linus Torvalds

Now, RTM_DELRULE no longer needs RTNL, and the only RTNL dependant
in RTM_NEWRULE is fib_unmerge(), which is called for the first
IPv4 rule.

Let's add fib_rules_ops.need_rtnl() and hold RTNL only for the
first IPv4 rule.

Tested:
The script below creates 1K rules in parallel in 4K netns, and
it got 20x/30x faster for IPv4/IPv6.

  #!/bin/bash
  N=4096
  F=rules.txt

  for i in $(seq $N); do ip netns add ns-$i; done
  printf 'rule add from all table %d\n' {1..1024} > $F

  for v in 4 6; do
  	echo "=== IPv${v} ==="
  	time { for i in $(seq $N); do nsenter \
  	--net=/var/run/netns/ns-$i ip -$v -batch $F & done; wait; }
  done

  for i in $(seq $N); do ip netns del ns-$i; done
  rm -f $F

Without this series:

  # ./test.sh
  === IPv4 ===

  real	0m22.752s
  user	0m7.834s
  sys	92m46.721s
  === IPv6 ===

  real	0m35.181s
  user	0m8.635s
  sys	142m30.479s

With this series:

  # ./test.sh
  === IPv4 ===

  real	0m0.918s
  user	0m5.675s
  sys	2m7.024s
  === IPv6 ===

  real	0m1.214s
  user	0m7.917s
  sys	4m19.489s

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
---
 include/net/fib_rules.h |  1 +
 net/core/fib_rules.c    | 15 ++++++---------
 net/ipv4/fib_rules.c    |  6 ++++++
 3 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 7636ef4da5ad..c6b94790fa81 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -93,6 +93,7 @@ struct fib_rules_ops {
 	/* Called after modifications to the rules set, must flush
 	 * the route cache if one exists. */
 	void			(*flush_cache)(struct fib_rules_ops *ops);
+	bool			(*need_rtnl)(struct net *net);
 
 	int			nlgroup;
 	struct list_head	rules_list;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 2b652dd83241..22e5e5e1a9c4 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -881,6 +881,7 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct nlattr *tb[FRA_MAX + 1];
 	bool user_priority = false;
 	struct fib_rule_hdr *frh;
+	bool unlock_rtnl = false;
 
 	frh = nlmsg_payload(nlh, sizeof(*frh));
 	if (!frh) {
@@ -906,8 +907,10 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		goto errout;
 
-	if (!rtnl_held)
+	if (!rtnl_held && ops->need_rtnl && ops->need_rtnl(net)) {
+		unlock_rtnl = true;
 		rtnl_net_lock(net);
+	}
 	mutex_lock(&ops->lock);
 
 	err = fib_nl2rule_locked(rule, ops, tb, extack);
@@ -978,7 +981,7 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
 	fib_rule_get(rule);
 
 	mutex_unlock(&ops->lock);
-	if (!rtnl_held)
+	if (unlock_rtnl)
 		rtnl_net_unlock(net);
 
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
@@ -989,7 +992,7 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
 
 errout_free:
 	mutex_unlock(&ops->lock);
-	if (!rtnl_held)
+	if (unlock_rtnl)
 		rtnl_net_unlock(net);
 	kfree(rule);
 errout:
@@ -1038,8 +1041,6 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		goto errout;
 
-	if (!rtnl_held)
-		rtnl_net_lock(net);
 	mutex_lock(&ops->lock);
 
 	err = fib_nl2rule_locked(nlrule, ops, tb, extack);
@@ -1096,8 +1097,6 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
 	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, NULL);
 
 	mutex_unlock(&ops->lock);
-	if (!rtnl_held)
-		rtnl_net_unlock(net);
 
 	notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
 	fib_rule_put(rule);
@@ -1108,8 +1107,6 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
 
 errout_free:
 	mutex_unlock(&ops->lock);
-	if (!rtnl_held)
-		rtnl_net_unlock(net);
 	kfree(nlrule);
 errout:
 	rules_ops_put(ops);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 16d202246a36..4edb0dca7be8 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -460,6 +460,11 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 	rt_cache_flush(ops->fro_net);
 }
 
+static bool fib4_rule_need_rtnl(struct net *net)
+{
+	return !net->ipv4.fib_has_custom_rules;
+}
+
 static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
 	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
@@ -473,6 +478,7 @@ static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
 	.fill		= fib4_rule_fill,
 	.nlmsg_payload	= fib4_rule_nlmsg_payload,
 	.flush_cache	= fib4_rule_flush_cache,
+	.need_rtnl	= fib4_rule_need_rtnl,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
 	.owner		= THIS_MODULE,
 };
-- 
2.55.0.rc0.799.gd6f94ed593-goog
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help