[PATCH v1 net-next 08/14] veth: Support per-netns device unregistration.
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: 2026-07-01 21:43:51
Subsystem:
networking drivers, the rest · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
Currently, veth_dellink() unregisters both local and peer devices
synchronously under RTNL.
Once RTNL is removed, it can be called concurrently from different
netns.
Let's use xchg() and unregister_netdevice_queue_net() to support
per-netns device unregistration.
This way, each device is queued for destruction only once by
the winner of the race.
Note that the extra netdev_hold() ensures that @peer obtained by
the first xchg() is not freed during the subsequent access to
netdev_priv(peer). The 2nd xchg() overwrites @dev to balance
the refcount.
Tested:
1. Create two veth pairs (veth1-2, veth3-4) between two netns
(ns1 & ns2).
# ip netns add ns1
# ip netns add ns2
# ip -n ns1 link add veth1 type veth peer veth2 netns ns2
# ip -n ns1 link add veth3 type veth peer veth4 netns ns2
2. Run bpftrace to check if the same process does NOT
unregister the paired veth devices
# bpftrace -e '#include <linux/netdevice.h>
kprobe:free_netdev {
$dev = (struct net_device *)arg0;
printf("PID: %d | DEV: %s%s\n", pid, $dev->name, kstack());
}'
3. Remove veth2 in ns2 and check bpftrace output
# ip -n ns2 link del veth2
PID: 2194 | DEV: veth2
free_netdev+5
netdev_run_todo+4798
rtnl_dellink+1507
rtnetlink_rcv_msg+1791
...
PID: 448 | DEV: veth1
free_netdev+5
netdev_run_todo+4798
process_scheduled_works+2538
...
4. Remove ns2 (thus veth4) and check bpftrace output
# ip netns del ns2
PID: 571 | DEV: veth4
free_netdev+5
netdev_run_todo+4798
default_device_exit_batch+2271
ops_undo_list+993
cleanup_net+1122
process_scheduled_works+2538
...
PID: 441 | DEV: veth3
free_netdev+5
netdev_run_todo+4798
process_scheduled_works+2538
...
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
---
drivers/net/veth.c | 34 +++++++++++++++++++++-------------
1 file changed, 21 insertions(+), 13 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 1c5142149175..8170bf33ccf9 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c@@ -77,6 +77,7 @@ struct veth_priv { struct bpf_prog *_xdp_prog; struct veth_rq *rq; unsigned int requested_headroom; + netdevice_tracker peer_tracker; }; struct veth_xdp_tx_bq {
@@ -1901,15 +1902,17 @@ static int veth_newlink(struct net_device *dev, priv = netdev_priv(dev); rcu_assign_pointer(priv->peer, peer); + netdev_hold(peer, &priv->peer_tracker, GFP_KERNEL); err = veth_init_queues(dev, tb); if (err) goto err_queues; priv = netdev_priv(peer); rcu_assign_pointer(priv->peer, dev); + netdev_hold(dev, &priv->peer_tracker, GFP_KERNEL); err = veth_init_queues(peer, tb); if (err) - goto err_queues; + goto err_peer_queues; veth_disable_gro(dev); /* update XDP supported features */
@@ -1918,7 +1921,11 @@ static int veth_newlink(struct net_device *dev, return 0; +err_peer_queues: + netdev_put(dev, &priv->peer_tracker); + priv = netdev_priv(dev); err_queues: + netdev_put(peer, &priv->peer_tracker); unregister_netdevice(dev); err_register_dev: /* nothing to do */
@@ -1933,24 +1940,25 @@ static int veth_newlink(struct net_device *dev, static void veth_dellink(struct net_device *dev, struct list_head *head) { - struct veth_priv *priv; + netdevice_tracker *peer_tracker; struct net_device *peer; + struct veth_priv *priv; priv = netdev_priv(dev); - peer = rtnl_dereference(priv->peer); + peer_tracker = &priv->peer_tracker; + peer = unrcu_pointer(xchg(&priv->peer, NULL)); + if (!peer) + return; - /* Note : dellink() is called from default_device_exit_batch(), - * before a rcu_synchronize() point. The devices are guaranteed - * not being freed before one RCU grace period. - */ - RCU_INIT_POINTER(priv->peer, NULL); unregister_netdevice_queue(dev, head); - if (peer) { - priv = netdev_priv(peer); - RCU_INIT_POINTER(priv->peer, NULL); - unregister_netdevice_queue(peer, head); - } + priv = netdev_priv(peer); + dev = unrcu_pointer(xchg(&priv->peer, NULL)); + if (dev) + unregister_netdevice_queue_net(dev_net(dev), peer, head); + + netdev_put(peer, peer_tracker); + netdev_put(dev, &priv->peer_tracker); } static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
--
2.55.0.rc0.799.gd6f94ed593-goog