Thread (39 messages) 39 messages, 5 authors, 2010-08-29

Re: [Bugme-new] [Bug 16626] New: Machine hangs with EIP at skb_copy_and_csum_dev

From: Eric Dumazet <hidden>
Date: 2010-08-24 15:08:58
Subsystem: networking [general], the rest · Maintainers: "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds

Le mardi 24 août 2010 à 16:27 +0300, Plamen Petrov a écrit :
The current status: if I enable GRO on the tg3 - the kernel oopses.
It just takes a different amount of time to trigger: somewhere from
30 seconds to 30 minutes.

The oopses looks the same, and here are the latest:

[picture 13]
http://picpaste.com/c8dbda8f5c15d9ce3e050dd7f245f5d0.jpg

[picture 14]
http://picpaste.com/646cca586b704c5b72d3cf9fa54c7344.jpg

I was wondering which debug options could help us track this down?
Thanks, here is an updated patch (against linux-2.6)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3721fbb..77c8eb7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1935,6 +1935,32 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
 					      illegal_highdma(dev, skb))));
 }
 
+int skb_csum_start_bug(const struct sk_buff *skb, int pos)
+{
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		long csstart;
+
+		csstart = skb->csum_start - skb_headroom(skb);
+		if (WARN_ON(csstart > skb_headlen(skb))) {
+			int i;
+
+			pr_err("%d: csum_start %u, offset %u, headroom %d, headlen %d, len %d\n",
+				   pos, skb->csum_start, skb->csum_offset, skb_headroom(skb),
+				   skb_headlen(skb), skb->len);
+			pr_err("nr_frags=%u gso_size=%u ",
+					skb_shinfo(skb)->nr_frags,
+					skb_shinfo(skb)->gso_size);
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+				pr_err("frag_size=%u ", skb_shinfo(skb)->frags[i].size);
+			}
+			pr_err("\n");
+			return 1;
+		}
+	}
+	return 0;
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
@@ -1959,11 +1985,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
+			if (skb_csum_start_bug(skb, 10))
+				goto out_kfree_skb;
 		} else {
 			if (skb_needs_linearize(skb, dev) &&
 			    __skb_linearize(skb))
 				goto out_kfree_skb;
 
+			if (skb_csum_start_bug(skb, 20))
+				goto out_kfree_skb;
 			/* If packet is not checksummed and device does not
 			 * support checksumming for this protocol, complete
 			 * checksumming here.
@@ -1974,10 +2004,16 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				if (!dev_can_checksum(dev, skb) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
+				if (skb_csum_start_bug(skb, 30))
+					goto out_kfree_skb;
 			}
 		}
 
-		rc = ops->ndo_start_xmit(skb, dev);
+		if (skb_csum_start_bug(skb, 40)) {
+			kfree_skb(skb);
+			rc = NETDEV_TX_OK;
+		} else
+			rc = ops->ndo_start_xmit(skb, dev);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
 		return rc;
@@ -1997,7 +2033,12 @@ gso:
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(nskb);
 
-		rc = ops->ndo_start_xmit(nskb, dev);
+		if (skb_csum_start_bug(skb, 50)) {
+			kfree_skb(skb);
+			rc = NETDEV_TX_OK;
+		} else
+			rc = ops->ndo_start_xmit(nskb, dev);
+
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
 				goto out_kfree_gso_skb;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3a2513f..3d54a1b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1824,13 +1824,15 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 {
 	__wsum csum;
 	long csstart;
+	extern int skb_csum_start_bug(const struct sk_buff *skb, int pos);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		csstart = skb->csum_start - skb_headroom(skb);
 	else
 		csstart = skb_headlen(skb);
 
-	BUG_ON(csstart > skb_headlen(skb));
+	if (skb_csum_start_bug(skb, 100))
+		return;
 
 	skb_copy_from_linear_data(skb, to, csstart);
 

Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help