[PATCH v4 net-next 3/5] rtnetlink: do not acquire RTNL in rtnl_getlink() with RTEXT_FILTER_NAME_ONLY
From: Eric Dumazet <edumazet@google.com>
Date: 2026-05-22 17:30:10
Subsystem:
networking [general], the rest · Maintainers:
"David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
When RTEXT_FILTER_NAME_ONLY is requested, rtnl_fill_ifinfo() is dumping device attributes which do not need RTNL protection. Many shell scripts invoke iproute2 commands specifying a device by its name. After this patch, they will no longer add RTNL pressure. Signed-off-by: Eric Dumazet <edumazet@google.com> --- net/core/rtnetlink.c | 94 +++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 27 deletions(-)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3dfa28927c7f92f906a0d89b7a1812b975d13854..c342b22528e4478a61f22e204a3934ba1a48cb3c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c@@ -2068,7 +2068,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct nlmsghdr *nlh; struct Qdisc *qdisc; - ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE;
@@ -2091,6 +2090,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (ext_filter_mask & RTEXT_FILTER_NAME_ONLY) goto end; + ASSERT_RTNL(); if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) goto nla_put_failure;
@@ -3468,6 +3468,21 @@ static struct net_device *rtnl_dev_get(struct net *net, return __dev_get_by_name(net, ifname); } +static struct net_device *rtnl_dev_get_rcu(struct net *net, + struct nlattr *tb[]) +{ + char ifname[ALTIFNAMSIZ]; + + if (tb[IFLA_IFNAME]) + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else if (tb[IFLA_ALT_IFNAME]) + nla_strscpy(ifname, tb[IFLA_ALT_IFNAME], ALTIFNAMSIZ); + else + return NULL; + + return dev_get_by_name_rcu(net, ifname); +} + static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) {
@@ -4187,14 +4202,16 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFLA_MAX + 1]; + netdevice_tracker dev_tracker; + struct net_device *dev = NULL; struct net *tgt_net = net; + u32 ext_filter_mask = 0; struct ifinfomsg *ifm; - struct nlattr *tb[IFLA_MAX+1]; - struct net_device *dev = NULL; struct sk_buff *nskb; int netnsid = -1; + bool need_rtnl; int err; - u32 ext_filter_mask = 0; err = rtnl_valid_getlink_req(skb, nlh, tb, extack); if (err < 0)
@@ -4214,43 +4231,65 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); - err = -EINVAL; ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = __dev_get_by_index(tgt_net, ifm->ifi_index); - else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(tgt_net, tb); - else + rcu_read_lock(); + if (ifm->ifi_index > 0) { + dev = dev_get_by_index_rcu(tgt_net, ifm->ifi_index); + } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { + dev = rtnl_dev_get_rcu(tgt_net, tb); + } else { + rcu_read_unlock(); + err = -EINVAL; goto out; + } + netdev_hold(dev, &dev_tracker, GFP_ATOMIC); + rcu_read_unlock(); err = -ENODEV; if (dev == NULL) goto out; + need_rtnl = !(ext_filter_mask & RTEXT_FILTER_NAME_ONLY); + +retry: + if (need_rtnl) { + rtnl_lock(); + /* Synchronize the carrier state so we don't report a state + * that we're not actually going to honour immediately; if + * the driver just did a carrier off->on transition, we can + * only TX if link watch work has run, but without this we'd + * already report carrier on, even if it doesn't work yet. + */ + linkwatch_sync_dev(dev); + } + err = -ENOBUFS; nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask)); - if (nskb == NULL) - goto out; + if (nskb) + err = rtnl_fill_ifinfo(nskb, dev, net, + RTM_NEWLINK, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, 0, 0, ext_filter_mask, + 0, NULL, 0, netnsid, GFP_KERNEL); - /* Synchronize the carrier state so we don't report a state - * that we're not actually going to honour immediately; if - * the driver just did a carrier off->on transition, we can - * only TX if link watch work has run, but without this we'd - * already report carrier on, even if it doesn't work yet. - */ - linkwatch_sync_dev(dev); + if (need_rtnl) + rtnl_unlock(); - err = rtnl_fill_ifinfo(nskb, dev, net, - RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, - 0, NULL, 0, netnsid, GFP_KERNEL); if (err < 0) { - /* -EMSGSIZE implies BUG in if_nlmsg_size */ - WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); - } else + if (err == -EMSGSIZE) { + if (!need_rtnl) { + /* Some altnames were added, retry with RTNL. */ + need_rtnl = true; + goto retry; + } + /* -EMSGSIZE implies BUG in if_nlmsg_size */ + WARN_ON_ONCE(1); + } + } else { err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); + } out: + netdev_put(dev, &dev_tracker); if (netnsid >= 0) put_net(tgt_net);
@@ -7117,7 +7156,8 @@ static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = {.msgtype = RTM_DELLINK, .doit = rtnl_dellink, .flags = RTNL_FLAG_DOIT_PERNET_WIP}, {.msgtype = RTM_GETLINK, .doit = rtnl_getlink, - .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, + .dumpit = rtnl_dump_ifinfo, + .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE | RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_SETLINK, .doit = rtnl_setlink, .flags = RTNL_FLAG_DOIT_PERNET_WIP}, {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all},
--
2.54.0.746.g67dd491aae-goog