Re: [patch,rfc] allow registration of multiple netpolls per interface
From: Jeff Moyer <hidden>
Date: 2005-06-22 21:16:36
Also in:
lkml
==> Regarding Re: [patch,rfc] allow registration of multiple netpolls per interface; Matt Mackall [off-list ref] adds: mpm> On Wed, Jun 22, 2005 at 07:47:37AM -0400, Jeff Moyer wrote: mpm> Hmm. It's conceivable we'll want netdump and kgdb on the same mpm> interface so we'll have to address this eventually..
quoted
Well, do you want to address it eventually, or now? As I said, it's never bitten anyone before.
mpm> Later's fine. I just don't want to design it out by accident again. OK.
quoted
quoted
quoted
struct netpoll_info { spinlock_t poll_lock; int poll_owner; int rx_flags; spinlock_t rx_lock; struct netpoll *rx_np; /* netpoll that registered an rx_hook */ };
[snip] mpm> It might be simpler to have a single lock here..?
quoted
Maybe. You can't really have netpoll code running on multiple cpus at the same time, right? This is the rx path, remember, so the other cpu should be spinning on the poll_lock. Keeping separate locks would allow you to unregister a struct netpoll associated with another net device without causing lock contention. This is a very minor win, obviously. I still feel like this npinfo struct is the right place for this, though. If you're strongly opposed to that, I'll change it.
mpm> No, certainly having it in npinfo makes sense. I just was wondering if mpm> we really need two locks in there. Oh, I misunderstood. Well, one protects recursing into the driver's poll routine, the other protects access to the np_rx pointer, which may later become a list. I don't think we can lump these two together, do you?
quoted
quoted
quoted
+ spin_lock_irqsave(&npinfo->rx_lock, flags); + if (npinfo->rx_np->dev == skb->dev) + np = npinfo->rx_np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags);
mpm> And I think that means we don't need the lock here either.
quoted
Sure we do. We need to protect against rmmod's.
mpm> How can we have an rmmmod when we're trapped? Looking over the code, I don't see what would prevent this. Could you point me the code which prevents this? It's a good thing we're discussing it, since I found that I didn't take the lock in netpoll_cleanup. Okay, so here's the full patch again, with the changes we've discussed. I've also included an interdiff. As you can see, the first version I sent didn't have some basic compile fixes, sorry about that. Anyway, I have booted and tested this version with multiple netpoll clients. Barring any negative feedback, I'll break this up and send it as a patch series. Thanks, Jeff (Interdiff first) diff -u linux-2.6.12-rc6/net/core/netpoll.c linux-2.6.12-rc6/net/core/netpoll.c
--- linux-2.6.12-rc6/net/core/netpoll.c 2005-06-21 16:03:22.409620400 -0400
+++ linux-2.6.12-rc6/net/core/netpoll.c 2005-06-22 16:51:24.336062231 -0400@@ -130,8 +130,8 @@ */ static void poll_napi(struct netpoll *np) { - int budget = 16; struct netpoll_info *npinfo = np->dev->npinfo; + int budget = 16; if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && npinfo->poll_owner != smp_processor_id() &&
@@ -344,22 +344,22 @@ static void arp_reply(struct sk_buff *skb) { + struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; + unsigned long flags; struct sk_buff *send_skb; - struct netpoll *np; - struct netpoll_info *npinfo = skb->dev->npinfo; - - if (!npinfo) return; + struct netpoll *np = NULL; spin_lock_irqsave(&npinfo->rx_lock, flags); - if (npinfo->rx_np->dev == skb->dev) + if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) np = npinfo->rx_np; spin_unlock_irqrestore(&npinfo->rx_lock, flags); - if (!np) return; + if (!np) + return; /* No arp on this interface */ if (skb->dev->flags & IFF_NOARP)
@@ -716,7 +716,7 @@ spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_flags |= NETPOLL_RX_ENABLED; npinfo->rx_np = np; - spin_unlock_irqsave(&npinfo->rx_lock, flags); + spin_unlock_irqrestore(&npinfo->rx_lock, flags); } /* last thing to do is link it to the net device structure */ ndev->npinfo = npinfo;
@@ -734,12 +734,15 @@ void netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; + unsigned long flags; if (np->dev) { npinfo = np->dev->npinfo; if (npinfo && npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_np = NULL; npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); } dev_put(np->dev); }
reverted:
--- linux-2.6.12-rc6/net/core/dev.c 2005-06-21 13:53:51.583407710 -0400
+++ linux-2.6.12-rc6/net/core/dev.c.orig 2005-06-20 19:51:59.000000000 -0400@@ -1656,7 +1656,6 @@ unsigned short type; /* if we've gotten here through NAPI, check netpoll */ - /* how else can we get here? --phro */ if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP;
And now, the full diff:
--- linux-2.6.12-rc6/net/core/netpoll.c.orig 2005-06-20 19:51:56.000000000 -0400
+++ linux-2.6.12-rc6/net/core/netpoll.c 2005-06-22 16:51:24.336062231 -0400@@ -130,19 +130,20 @@ static int checksum_udp(struct sk_buff * */ static void poll_napi(struct netpoll *np) { + struct netpoll_info *npinfo = np->dev->npinfo; int budget = 16; if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && - np->poll_owner != smp_processor_id() && - spin_trylock(&np->poll_lock)) { - np->rx_flags |= NETPOLL_RX_DROP; + npinfo->poll_owner != smp_processor_id() && + spin_trylock(&npinfo->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); np->dev->poll(np->dev, &budget); atomic_dec(&trapped); - np->rx_flags &= ~NETPOLL_RX_DROP; - spin_unlock(&np->poll_lock); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&npinfo->poll_lock); } }
@@ -245,6 +246,7 @@ repeat: static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status; + struct netpoll_info *npinfo; repeat: if(!np || !np->dev || !netif_running(np->dev)) {
@@ -253,7 +255,8 @@ repeat: } /* avoid recursion */ - if(np->poll_owner == smp_processor_id() || + npinfo = np->dev->npinfo; + if(npinfo->poll_owner == smp_processor_id() || np->dev->xmit_lock_owner == smp_processor_id()) { if (np->drop) np->drop(skb);
@@ -341,14 +344,22 @@ void netpoll_send_udp(struct netpoll *np static void arp_reply(struct sk_buff *skb) { + struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; + unsigned long flags; struct sk_buff *send_skb; - struct netpoll *np = skb->dev->np; + struct netpoll *np = NULL; + + spin_lock_irqsave(&npinfo->rx_lock, flags); + if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) + np = npinfo->rx_np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); - if (!np) return; + if (!np) + return; /* No arp on this interface */ if (skb->dev->flags & IFF_NOARP)
@@ -429,9 +440,9 @@ int __netpoll_rx(struct sk_buff *skb) int proto, len, ulen; struct iphdr *iph; struct udphdr *uh; - struct netpoll *np = skb->dev->np; + struct netpoll *np = skb->dev->npinfo->rx_np; - if (!np->rx_hook) + if (!np) goto out; if (skb->dev->type != ARPHRD_ETHER) goto out;
@@ -611,9 +622,8 @@ int netpoll_setup(struct netpoll *np) { struct net_device *ndev = NULL; struct in_device *in_dev; - - np->poll_lock = SPIN_LOCK_UNLOCKED; - np->poll_owner = -1; + struct netpoll_info *npinfo; + unsigned long flags; if (np->dev_name) ndev = dev_get_by_name(np->dev_name);
@@ -624,7 +634,17 @@ int netpoll_setup(struct netpoll *np) } np->dev = ndev; - ndev->np = np; + if (!ndev->npinfo) { + npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + if (!npinfo) + goto release; + + npinfo->rx_np = NULL; + npinfo->poll_lock = SPIN_LOCK_UNLOCKED; + npinfo->poll_owner = -1; + npinfo->rx_lock = SPIN_LOCK_UNLOCKED; + } else + npinfo = ndev->npinfo; if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
@@ -692,13 +712,20 @@ int netpoll_setup(struct netpoll *np) np->name, HIPQUAD(np->local_ip)); } - if(np->rx_hook) - np->rx_flags = NETPOLL_RX_ENABLED; + if(np->rx_hook) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; + npinfo->rx_np = np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + /* last thing to do is link it to the net device structure */ + ndev->npinfo = npinfo; return 0; release: - ndev->np = NULL; + if (!ndev->npinfo) + kfree(npinfo); np->dev = NULL; dev_put(ndev); return -1;
@@ -706,9 +733,20 @@ int netpoll_setup(struct netpoll *np) void netpoll_cleanup(struct netpoll *np) { - if (np->dev) - np->dev->np = NULL; - dev_put(np->dev); + struct netpoll_info *npinfo; + unsigned long flags; + + if (np->dev) { + npinfo = np->dev->npinfo; + if (npinfo && npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + dev_put(np->dev); + } + np->dev = NULL; } --- linux-2.6.12-rc6/include/linux/netpoll.h.orig 2005-06-20 19:51:47.000000000 -0400 +++ linux-2.6.12-rc6/include/linux/netpoll.h 2005-06-21 15:29:48.000000000 -0400
@@ -16,14 +16,19 @@ struct netpoll; struct netpoll { struct net_device *dev; char dev_name[16], *name; - int rx_flags; void (*rx_hook)(struct netpoll *, int, char *, int); void (*drop)(struct sk_buff *skb); u32 local_ip, remote_ip; u16 local_port, remote_port; unsigned char local_mac[6], remote_mac[6]; +}; + +struct netpoll_info { spinlock_t poll_lock; int poll_owner; + int rx_flags; + spinlock_t rx_lock; + struct netpoll *rx_np; /* netpoll that registered an rx_hook */ }; void netpoll_poll(struct netpoll *np);
@@ -39,22 +44,35 @@ void netpoll_queue(struct sk_buff *skb); #ifdef CONFIG_NETPOLL static inline int netpoll_rx(struct sk_buff *skb) { - return skb->dev->np && skb->dev->np->rx_flags && __netpoll_rx(skb); + struct netpoll_info *npinfo = skb->dev->npinfo; + unsigned long flags; + int ret = 0; + + if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags)) + return 0; + + spin_lock_irqsave(&npinfo->rx_lock, flags); + /* check rx_flags again with the lock held */ + if (npinfo->rx_flags && __netpoll_rx(skb)) + ret = 1; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + + return ret; } static inline void netpoll_poll_lock(struct net_device *dev) { - if (dev->np) { - spin_lock(&dev->np->poll_lock); - dev->np->poll_owner = smp_processor_id(); + if (dev->npinfo) { + spin_lock(&dev->npinfo->poll_lock); + dev->npinfo->poll_owner = smp_processor_id(); } } static inline void netpoll_poll_unlock(struct net_device *dev) { - if (dev->np) { - spin_unlock(&dev->np->poll_lock); - dev->np->poll_owner = -1; + if (dev->npinfo) { + dev->npinfo->poll_owner = -1; + spin_unlock(&dev->npinfo->poll_lock); } } --- linux-2.6.12-rc6/include/linux/netdevice.h.orig 2005-06-20 20:26:21.000000000 -0400 +++ linux-2.6.12-rc6/include/linux/netdevice.h 2005-06-21 14:46:52.000000000 -0400
@@ -41,7 +41,7 @@ struct divert_blk; struct vlan_group; struct ethtool_ops; -struct netpoll; +struct netpoll_info; /* source back-compat hooks */ #define SET_ETHTOOL_OPS(netdev,ops) \ ( (netdev)->ethtool_ops = (ops) )
@@ -468,7 +468,7 @@ struct net_device unsigned char *haddr); int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); #ifdef CONFIG_NETPOLL - struct netpoll *np; + struct netpoll_info *npinfo; #endif #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev);