Re: [Bugme-new] [Bug 16626] New: Machine hangs with EIP at skb_copy_and_csum_dev
From: Eric Dumazet <hidden>
Date: 2010-08-24 15:08:58
Subsystem:
networking [general], the rest · Maintainers:
"David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
Le mardi 24 août 2010 à 16:27 +0300, Plamen Petrov a écrit :
The current status: if I enable GRO on the tg3 - the kernel oopses. It just takes a different amount of time to trigger: somewhere from 30 seconds to 30 minutes. The oopses looks the same, and here are the latest: [picture 13] http://picpaste.com/c8dbda8f5c15d9ce3e050dd7f245f5d0.jpg [picture 14] http://picpaste.com/646cca586b704c5b72d3cf9fa54c7344.jpg I was wondering which debug options could help us track this down?
Thanks, here is an updated patch (against linux-2.6)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3721fbb..77c8eb7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c@@ -1935,6 +1935,32 @@ static inline int skb_needs_linearize(struct sk_buff *skb, illegal_highdma(dev, skb)))); } +int skb_csum_start_bug(const struct sk_buff *skb, int pos) +{ + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + long csstart; + + csstart = skb->csum_start - skb_headroom(skb); + if (WARN_ON(csstart > skb_headlen(skb))) { + int i; + + pr_err("%d: csum_start %u, offset %u, headroom %d, headlen %d, len %d\n", + pos, skb->csum_start, skb->csum_offset, skb_headroom(skb), + skb_headlen(skb), skb->len); + pr_err("nr_frags=%u gso_size=%u ", + skb_shinfo(skb)->nr_frags, + skb_shinfo(skb)->gso_size); + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + pr_err("frag_size=%u ", skb_shinfo(skb)->frags[i].size); + } + pr_err("\n"); + return 1; + } + } + return 0; +} + int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) {
@@ -1959,11 +1985,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto out_kfree_skb; if (skb->next) goto gso; + if (skb_csum_start_bug(skb, 10)) + goto out_kfree_skb; } else { if (skb_needs_linearize(skb, dev) && __skb_linearize(skb)) goto out_kfree_skb; + if (skb_csum_start_bug(skb, 20)) + goto out_kfree_skb; /* If packet is not checksummed and device does not * support checksumming for this protocol, complete * checksumming here.
@@ -1974,10 +2004,16 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) goto out_kfree_skb; + if (skb_csum_start_bug(skb, 30)) + goto out_kfree_skb; } } - rc = ops->ndo_start_xmit(skb, dev); + if (skb_csum_start_bug(skb, 40)) { + kfree_skb(skb); + rc = NETDEV_TX_OK; + } else + rc = ops->ndo_start_xmit(skb, dev); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc;
@@ -1997,7 +2033,12 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); - rc = ops->ndo_start_xmit(nskb, dev); + if (skb_csum_start_bug(skb, 50)) { + kfree_skb(skb); + rc = NETDEV_TX_OK; + } else + rc = ops->ndo_start_xmit(nskb, dev); + if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3a2513f..3d54a1b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c@@ -1824,13 +1824,15 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; long csstart; + extern int skb_csum_start_bug(const struct sk_buff *skb, int pos); if (skb->ip_summed == CHECKSUM_PARTIAL) csstart = skb->csum_start - skb_headroom(skb); else csstart = skb_headlen(skb); - BUG_ON(csstart > skb_headlen(skb)); + if (skb_csum_start_bug(skb, 100)) + return; skb_copy_from_linear_data(skb, to, csstart);