Re: netconsole: HARDIRQ-safe -> HARDIRQ-unsafe lock order warning
From: Mike Galbraith <hidden>
Date: 2025-08-21 03:37:59
Also in:
lkml
On Wed, 2025-08-20 at 10:36 -0700, Breno Leitao wrote:
On Wed, Aug 20, 2025 at 02:31:02PM +0200, Mike Galbraith wrote:quoted
On Tue, 2025-08-19 at 10:27 -0700, Breno Leitao wrote:quoted
I’ve continued investigating possible solutions, and it looks like moving netconsole over to the non‑blocking console (nbcon) framework might be the right approach. Unlike the classic console path, nbcon doesn’t rely on the global console lock, which was one of the main concerns regarding the possible deadlock.ATM at least, classic can remotely log a crash whereas nbcon can't per test drive, so it would be nice for classic to stick around until nbcon learns some atomic packet blasting.Oh, does it mean that during crash nbcon invokes `write_atomic` call back, and because this patch doesn't implement it, it will not send those pkts? Am I reading it correct?
No, I'm just saying that the kernel's last gasp doesn't make it out of the box with CONFIG_NETCONSOLE_NBCON=y as your patch sits. It doesn't with my wireless or RT lockdep et al appeasement hackery either, but I don't care deeply, as long as I can capture kernel spew inspired by LTP and whatnot, I'm happy.. kdump logs most death rattles.
quoted
quoted
The new path is protected by NETCONSOLE_NBCON, which is disabled by default. This allows us to experiment and test both approaches.As patch sits, interrupts being disabled is still a problem, gripes below.You mean that the IRQs are disabled at the acquire of target_list_lock? If so, an option is to turn that list an RCU list ?!
Yeah. I switched to local_bh_disable(), but was originally using rcu_read_lock_bh() for RT.
quoted
Not disabling IRQs makes nbcon gripe free, but creates the issue of netpoll_tx_running() lying to the rest of NETPOLL consumers. RT and the wireless stack have in common that IRQs being disabled in netpoll.c sucks rocks for them. I've been carrying a hack to allow RT to use netconsole since 5.15
(hm, grepping my modest forest I see it's actually 4.10.. hohum)
What is this patch you have?
Make boxen stop bitching at NETCONSOLE_NBCON version below.
netpoll: Make it RT friendly
PREEMPT_RT cannot alloc/free memory when not preemptible, making
disabling of IRQs across transmission an issue for RT.
Use local_bh_disable() to provide local exclusion for RT (via
softirq_ctrl.lock) for normal and fallback transmission paths
instead of disabling IRQs. Since zap_completion_queue() callers
ensure pointer stability, replace get_cpu_var() therein with
this_cpu_ptr() to keep it preemptible across kfree().
Disable a couple warnings for RT, and we're done.
v0.kinda-works -> v1:
remove get_cpu_var() from zap_completion_queue().
fix/test netpoll_tx_running() to work for RT/!RT.
fix/test xmit fallback path for RT.
(blessed by nobody, plz keep all shrapnel etc etc:)
Signed-off-by: Mike Galbraith <redacted>
---
drivers/net/netconsole.c | 4 ++--
include/linux/netpoll.h | 4 +++-
net/core/netpoll.c | 47 ++++++++++++++++++++++++++++++++++++--
---------
3 files changed, 41 insertions(+), 14 deletions(-)
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c@@ -1952,12 +1952,12 @@ static void netcon_write_thread(struct c static void netconsole_device_lock(struct console *con, unsigned long
*flags)
{
/* protects all the targets at the same time */
- spin_lock_irqsave(&target_list_lock, *flags);
+ spin_lock(&target_list_lock);
}
static void netconsole_device_unlock(struct console *con, unsigned
long flags)
{
- spin_unlock_irqrestore(&target_list_lock, flags);
+ spin_unlock(&target_list_lock);
}
#endif
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h@@ -100,9 +100,11 @@ static inline void netpoll_poll_unlock(v smp_store_release(&napi->poll_owner, -1); } +DECLARE_PER_CPU(int, _netpoll_tx_running); + static inline bool netpoll_tx_running(struct net_device *dev) { - return irqs_disabled(); + return this_cpu_read(_netpoll_tx_running); } #else --- a/net/core/netpoll.c +++ b/net/core/netpoll.c
@@ -58,6 +58,29 @@ static void zap_completion_queue(void); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); +DEFINE_PER_CPU(int, _netpoll_tx_running); +EXPORT_PER_CPU_SYMBOL(_netpoll_tx_running); + +#define
netpoll_tx_begin(flags) \
+ do { \
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || \
+ IS_ENABLED(CONFIG_NETCONSOLE_NBCON)) \
+ local_bh_disable(); \
+ else \
+ local_irq_save(flags); \
+ this_cpu_write(_netpoll_tx_running,
1); \
+ } while (0)
+
+#define netpoll_tx_end(flags) \
+ do { \
+ this_cpu_write(_netpoll_tx_running,
0); \
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || \
+ IS_ENABLED(CONFIG_NETCONSOLE_NBCON)) \
+ local_bh_enable(); \
+ else \
+ local_irq_restore(flags); \
+ } while (0)
+
static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
struct net_device *dev,
struct netdev_queue *txq)@@ -90,7 +113,7 @@ static void queue_process(struct work_st struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; - unsigned long flags; + unsigned long __maybe_unused flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev;
@@ -102,7 +125,7 @@ static void queue_process(struct work_st continue; } - local_irq_save(flags); + netpoll_tx_begin(flags); /* check if skb->queue_mapping is still valid */ q_index = skb_get_queue_mapping(skb); if (unlikely(q_index >= dev->real_num_tx_queues)) {
@@ -115,13 +138,13 @@ static void queue_process(struct work_st !dev_xmit_complete(netpoll_start_xmit(skb, dev,
txq))) {
skb_queue_head(&npinfo->txq, skb);
HARD_TX_UNLOCK(dev, txq);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
schedule_delayed_work(&npinfo->tx_work,
HZ/10);
return;
}
HARD_TX_UNLOCK(dev, txq);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
}
}
@@ -246,7 +269,7 @@ static void refill_skbs(struct netpoll * static void zap_completion_queue(void) { unsigned long flags; - struct softnet_data *sd = &get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); if (sd->completion_queue) { struct sk_buff *clist;
@@ -267,8 +290,6 @@ static void zap_completion_queue(void) } } } - - put_cpu_var(softnet_data); } static struct sk_buff *find_skb(struct netpoll *np, int len, int
reserve)
@@ -319,7 +340,9 @@ static netdev_tx_t __netpoll_send_skb(st /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; +#if (!defined(CONFIG_PREEMPT_RT) && !defined(CONFIG_NETCONSOLE_NBCON)) lockdep_assert_irqs_disabled(); +#endif dev = np->dev; rcu_read_lock();
@@ -356,9 +379,11 @@ static netdev_tx_t __netpoll_send_skb(st udelay(USEC_PER_POLL); } +#if (!defined(CONFIG_PREEMPT_RT) && !defined(CONFIG_NETCONSOLE_NBCON)) WARN_ONCE(!irqs_disabled(), "netpoll_send_skb_on_dev(): %s enabled
interrupts in poll (%pS)\n", dev->name, dev->netdev_ops->ndo_start_xmit); +#endif }
@@ -399,16 +424,16 @@ static void netpoll_udp_checksum(struct netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { - unsigned long flags; + unsigned long __maybe_unused flags; netdev_tx_t ret; if (unlikely(!np)) { dev_kfree_skb_irq(skb); ret = NET_XMIT_DROP; } else { - local_irq_save(flags); + netpoll_tx_begin(flags); ret = __netpoll_send_skb(np, skb); - local_irq_restore(flags); + netpoll_tx_end(flags); } return ret; }
@@ -501,7 +526,7 @@ int netpoll_send_udp(struct netpoll *np, int total_len, ip_len, udp_len; struct sk_buff *skb; - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
!IS_ENABLED(CONFIG_NETCONSOLE_NBCON)) WARN_ON_ONCE(!irqs_disabled()); udp_len = len + sizeof(struct udphdr);