Re: [PATCH net-next v3 03/13] net: introduce ndo_set_rx_mode_async and dev_rx_mode_work
From: Jakub Kicinski <kuba@kernel.org>
Date: 2026-03-23 23:20:06
Also in:
intel-wired-lan, linux-doc, linux-kselftest, linux-rdma, linux-wireless, lkml
On Thu, 19 Mar 2026 18:24:51 -0700 Stanislav Fomichev wrote:
quoted hunk ↗ jump to hunk
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 35704d115312..dc83d78d3b27 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst@@ -289,6 +289,14 @@ struct net_device synchronization rules ndo_set_rx_mode: Synchronization: netif_addr_lock spinlock. Context: BHs disabled + Notes: Deprecated in favor of sleepable ndo_set_rx_mode_async. +ndo_set_rx_mode_async: + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. + Context: process (from a work queue) + Notes: Sleepable version of ndo_set_rx_mode. Receives snapshots
It's probably just my weirdness but I find creating adjectives out of random nouns by adding "-able" to be in poor taste. "sleepable" took root in certain three letter subsystems but I hope it won't in netdev. Please use your words: Notes: Async version of ndo_set_rx_mode which runs in process context. Receives snapshots f the unicast and multicast address lists.
quoted hunk ↗ jump to hunk
+ of the unicast and multicast address lists. ndo_setup_tc: ``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT locksdiff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 469b7cdb3237..b05bdd67b807 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h@@ -1117,6 +1117,16 @@ struct netdev_net_notifier { * This function is called device changes address list filtering. * If driver handles unicast address filtering, it should set * IFF_UNICAST_FLT in its priv_flags. + * Cannot sleep, called with netif_addr_lock_bh held. + * Deprecated in favor of sleepable ndo_set_rx_mode_async. + * + * void (*ndo_set_rx_mode_async)(struct net_device *dev, + * struct netdev_hw_addr_list *uc, + * struct netdev_hw_addr_list *mc); + * Sleepable version of ndo_set_rx_mode. Called from a work queue + * with rtnl_lock and netdev_lock_ops(dev) held. The uc/mc parameters + * are snapshots of the address lists - iterate with + * netdev_hw_addr_list_for_each(ha, uc). * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address@@ -1437,6 +1447,9 @@ struct net_device_ops { void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); + void (*ndo_set_rx_mode_async)(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc); int (*ndo_set_mac_address)(struct net_device *dev, void *addr); int (*ndo_validate_addr)(struct net_device *dev);@@ -1903,6 +1916,7 @@ enum netdev_reg_state { * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() + * @rx_mode_work: Work queue entry for ndo_set_rx_mode_async() * @uc: unicast mac addresses * @mc: multicast mac addresses * @dev_addrs: list of device hw addresses@@ -2293,6 +2307,7 @@ struct net_device { unsigned int promiscuity; unsigned int allmulti; bool uc_promisc; + struct work_struct rx_mode_work; #ifdef CONFIG_LOCKDEP unsigned char nested_level; #endif@@ -4661,6 +4676,11 @@ static inline bool netif_device_present(const struct net_device *dev) return test_bit(__LINK_STATE_PRESENT, &dev->state); } +static inline bool netif_up_and_present(const struct net_device *dev) +{ + return (dev->flags & IFF_UP) && netif_device_present(dev);
Is this really worth a dedicated helper? What are you trying to express here semantically?
quoted hunk ↗ jump to hunk
+ void netif_device_detach(struct net_device *dev); void netif_device_attach(struct net_device *dev);diff --git a/net/core/dev.c b/net/core/dev.c index 200d44883fc1..fedc423306fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c@@ -2381,6 +2381,8 @@ static void netstamp_clear(struct work_struct *work) static DECLARE_WORK(netstamp_work, netstamp_clear); #endif +static struct workqueue_struct *rx_mode_wq; + void net_enable_timestamp(void) { #ifdef CONFIG_JUMP_LABEL@@ -9669,22 +9671,84 @@ int netif_set_allmulti(struct net_device *dev, int inc, bool notify) return 0; } -/* - * Upload unicast and multicast address lists to device and - * configure RX filtering. When the device doesn't support unicast - * filtering it is put in promiscuous mode while unicast addresses - * are present. +static void dev_rx_mode_work(struct work_struct *work) +{ + struct net_device *dev = container_of(work, struct net_device, + rx_mode_work); + struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref; + const struct net_device_ops *ops = dev->netdev_ops; + int err; + + __hw_addr_init(&uc_snap); + __hw_addr_init(&mc_snap); + __hw_addr_init(&uc_ref); + __hw_addr_init(&mc_ref); + + rtnl_lock(); + netdev_lock_ops(dev); + + if (!netif_up_and_present(dev)) + goto out; + + if (ops->ndo_set_rx_mode_async) {
How did we get here if we don't have this op? Are you planning to plumb more code thru this work in the future? If yes the whole rx_mode handling should be in a dedicated helper rather than indenting most of the function.
+ netif_addr_lock_bh(dev); + + err = __hw_addr_list_snapshot(&uc_snap, &dev->uc, + dev->addr_len); + if (!err) + err = __hw_addr_list_snapshot(&uc_ref, &dev->uc, + dev->addr_len); + if (!err) + err = __hw_addr_list_snapshot(&mc_snap, &dev->mc, + dev->addr_len); + if (!err) + err = __hw_addr_list_snapshot(&mc_ref, &dev->mc, + dev->addr_len);
This doesn't get slow with a few thousands of addresses?
quoted hunk ↗ jump to hunk
+ netif_addr_unlock_bh(dev); + + if (err) { + netdev_WARN(dev, "failed to sync uc/mc addresses\n"); + __hw_addr_flush(&uc_snap); + __hw_addr_flush(&uc_ref); + __hw_addr_flush(&mc_snap); + goto out; + } + + ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap); + + netif_addr_lock_bh(dev); + __hw_addr_list_reconcile(&dev->uc, &uc_snap, + &uc_ref, dev->addr_len); + __hw_addr_list_reconcile(&dev->mc, &mc_snap, + &mc_ref, dev->addr_len); + netif_addr_unlock_bh(dev); + } + +out: + netdev_unlock_ops(dev); + rtnl_unlock(); +} + +/** + * __dev_set_rx_mode() - upload unicast and multicast address lists to device + * and configure RX filtering. + * @dev: device + * + * When the device doesn't support unicast filtering it is put in promiscuous + * mode while unicast addresses are present. */ void __dev_set_rx_mode(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; /* dev_open will call this function so the list will stay sane. */ - if (!(dev->flags&IFF_UP)) + if (!netif_up_and_present(dev)) return; - if (!netif_device_present(dev)) + if (ops->ndo_set_rx_mode_async) { + queue_work(rx_mode_wq, &dev->rx_mode_work); return; + } if (!(dev->priv_flags & IFF_UNICAST_FLT)) { /* Unicast addresses changes may only happen under the rtnl,@@ -11708,6 +11772,16 @@ void netdev_run_todo(void) __rtnl_unlock(); + /* Make sure all pending rx_mode work completes before returning. + * + * rx_mode_wq may be NULL during early boot: + * core_initcall(netlink_proto_init) vs subsys_initcall(net_dev_init). + * + * Check current_work() to avoid flushing from the wq. + */ + if (rx_mode_wq && !current_work()) + flush_workqueue(rx_mode_wq);
Can we give the work a reference on the netdev (at init time) and cancel + release it here instead of flushing / waiting?
quoted hunk ↗ jump to hunk
/* Wait for rcu callbacks to finish before next phase */ if (!list_empty(&list)) rcu_barrier();@@ -12099,6 +12173,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #endif mutex_init(&dev->lock); + INIT_WORK(&dev->rx_mode_work, dev_rx_mode_work); dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev);@@ -12203,6 +12278,8 @@ void free_netdev(struct net_device *dev) kfree(rcu_dereference_protected(dev->ingress_queue, 1)); + cancel_work_sync(&dev->rx_mode_work);
Should never happen so maybe wrap it in a WARN ?
quoted hunk ↗ jump to hunk
/* Flush device addresses */ dev_addr_flush(dev);@@ -13296,6 +13373,10 @@ static int __init net_dev_init(void) if (register_pernet_device(&default_device_ops)) goto out; + rx_mode_wq = alloc_ordered_workqueue("rx_mode_wq", 0); + if (!rx_mode_wq) + goto out; + open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action);