[PATCH v1 net-next 14/14] ipvlan: Support per-netns netdev unregistration.
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: 2026-07-01 21:43:55
Subsystem:
networking drivers, the rest · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
When a lower device is unregistered, its upper ipvlan devices
must also be unregistered. However, these upper devices may
reside in different netns than the lower device.
Let's use unregister_netdevice_queue_net() to support per-netns
device unregistration for ipvlan.
The new dying flag in struct ipvl_dev is used to avoid a race
that ipvlan_link_delete() is called while its lower device is
being removed in ipvlan_device_event().
If dying is true in ipvlan_link_delete(), the ipvlan device is
already destructed but not yet unregistered. In this case,
unregistration will be done in __rtnl_net_unlock() of the
->dellink() caller.
Tested:
1. Create veth in ns1 and two ipvlan devices in ns2 and ns3.
# ip netns add ns1
# ip netns add ns2
# ip netns add ns3
# ip -n ns1 link add veth0 type veth peer veth1
# ip -n ns2 link add ipvl2 link veth0 link-netns ns1 type ipvlan mode l2
# ip -n ns3 link add ipvl3 link veth0 link-netns ns1 type ipvlan mode l2
2. Run bpftrace to check that veth is unregistered first but
wait ipvlan to be unregistered
# bpftrace -e '#include <linux/netdevice.h>
kprobe:ipvlan_uninit,
kprobe:veth_dellink,
kprobe:free_netdev {
$dev = (struct net_device *)arg0;
printf("PID: %d | DEV: %s%s\n", pid, $dev->name, kstack());
}'
3. Remove the lower veth0 in ns1.
# ip -n ns1 link del veth0
We can see that veth0 is freed after unregistering ipvl2 and ipvl3
in per-netns work because ipvl_port holds refcount of veth0.
PID: 2010 | DEV: veth0
veth_dellink+5
rtnl_dellink+1213
rtnetlink_rcv_msg+1791
...
PID: 440 | DEV: ipvl2
ipvlan_uninit+5
unregister_netdevice_many_notify+7129
unregister_netdevice_many_net+1050
rtnl_net_work_func+136
process_scheduled_works+2538
...
PID: 440 | DEV: ipvl2
free_netdev+5
netdev_run_todo+4798
process_scheduled_works+2538
...
PID: 440 | DEV: ipvl3
ipvlan_uninit+5
unregister_netdevice_many_notify+7129
unregister_netdevice_many_net+1050
rtnl_net_work_func+136
process_scheduled_works+2538
...
PID: 2010 | DEV: veth0
free_netdev+5
netdev_run_todo+4798
rtnl_dellink+1507
rtnetlink_rcv_msg+1791
...
PID: 440 | DEV: ipvl3
free_netdev+5
netdev_run_todo+4798
process_scheduled_works+2538
...
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
---
drivers/net/ipvlan/ipvlan.h | 4 +++-
drivers/net/ipvlan/ipvlan_main.c | 25 ++++++++++++++++---------
drivers/net/ipvlan/ipvtap.c | 3 ++-
3 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index a0736f5c89f6..a83313244add 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h@@ -72,6 +72,7 @@ struct ipvl_dev { DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; u32 msg_enable; + bool dying; }; struct ipvl_addr {
@@ -216,7 +217,8 @@ struct ipvtap_dev { struct tap_dev tap; }; -void __ipvtap_dellink(struct net_device *dev, struct list_head *head); +void __ipvtap_dellink(struct net *net, struct net_device *dev, + struct list_head *head); #endif #endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 41024fe27b78..7e2cf43ca78a 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c@@ -700,7 +700,8 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params, } EXPORT_SYMBOL_GPL(ipvlan_link_new); -static void __ipvlan_link_delete(struct net_device *dev, struct list_head *head) +static void __ipvlan_link_delete(struct net *net, struct net_device *dev, + struct list_head *head) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next;
@@ -715,7 +716,7 @@ static void __ipvlan_link_delete(struct net_device *dev, struct list_head *head) ida_free(&ipvlan->port->ida, dev->dev_id); list_del_rcu(&ipvlan->pnode); - unregister_netdevice_queue(dev, head); + unregister_netdevice_queue_net(net, dev, head); netdev_upper_dev_unlink(ipvlan->phy_dev, dev); }
@@ -724,18 +725,20 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); mutex_lock(&ipvlan->port->pnodes_lock); - __ipvlan_link_delete(dev, head); + if (!ipvlan->dying) + __ipvlan_link_delete(dev_net(dev), dev, head); mutex_unlock(&ipvlan->port->pnodes_lock); } #if IS_ENABLED(CONFIG_IPVTAP) -void __ipvtap_dellink(struct net_device *dev, struct list_head *head) +void __ipvtap_dellink(struct net *net, struct net_device *dev, + struct list_head *head) { struct ipvtap_dev *vlantap = netdev_priv(dev); netdev_rx_handler_unregister(dev); tap_del_queues(&vlantap->tap); - __ipvlan_link_delete(dev, head); + __ipvlan_link_delete(net, dev, head); } EXPORT_SYMBOL_GPL(__ipvtap_dellink); #endif
@@ -832,22 +835,26 @@ static int ipvlan_device_event(struct notifier_block *unused, ipvlan_migrate_l3s_hook(oldnet, newnet); break; } - case NETDEV_UNREGISTER: + case NETDEV_UNREGISTER: { + struct net *net = dev_net(dev); + if (dev->reg_state != NETREG_UNREGISTERING) break; list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) { + ipvlan->dying = true; + #if IS_ENABLED(CONFIG_IPVTAP) if (ipvlan->dev->rtnl_link_ops != &ipvlan_link_ops) - __ipvtap_dellink(ipvlan->dev, &lst_kill); + __ipvtap_dellink(net, ipvlan->dev, &lst_kill); else #endif - __ipvlan_link_delete(ipvlan->dev, &lst_kill); + __ipvlan_link_delete(net, ipvlan->dev, &lst_kill); } unregister_netdevice_many(&lst_kill); break; - + } case NETDEV_FEAT_CHANGE: list_for_each_entry(ipvlan, &port->ipvlans, pnode) { netif_inherit_tso_max(ipvlan->dev, dev);
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index 17b0dd7cf73b..b790959c03f5 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c@@ -110,7 +110,8 @@ static void ipvtap_dellink(struct net_device *dev, struct ipvl_port *port = vlantap->vlan.port; mutex_lock(&port->pnodes_lock); - __ipvtap_dellink(dev, head); + if (!vlantap->vlan.dying) + __ipvtap_dellink(dev_net(dev), dev, head); mutex_unlock(&port->pnodes_lock); }
--
2.55.0.rc0.799.gd6f94ed593-goog