Re: [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads

From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: 2013-12-31 19:09:48

On 11/26/2013 11:41 AM, Paul Durrant wrote:

This patch adds support for IPv6 checksum offload and GSO when those
features are available in the backend.

Sorry for late review. Mostly style comments.

quoted hunk ↗ jump to hunk

Signed-off-by: Paul Durrant <redacted>
Cc: Konrad Rzeszutek Wilk <redacted>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <redacted>
Cc: Ian Campbell <redacted>
Cc: Wei Liu <redacted>
Cc: Annie Li <redacted>
---

v3:
  - Addressed comments raised by Annie Li

v2:
  - Addressed comments raised by Ian Campbell

  drivers/net/xen-netfront.c |  239 ++++++++++++++++++++++++++++++++++++++++----
  include/linux/ipv6.h       |    2 +
  2 files changed, 224 insertions(+), 17 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index dd1011e..fe747e4 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c

@@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
  		tx->flags |= XEN_NETTXF_extra_info;
  
  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
-		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
+			          XEN_NETIF_GSO_TYPE_TCPV6 :
+			          XEN_NETIF_GSO_TYPE_TCPV4;
  		gso->u.gso.pad = 0;
  		gso->u.gso.features = 0;

@@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
  		return -EINVAL;
  	}
  
-	/* Currently only TCPv4 S.O. is supported. */
-	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
+	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
  		if (net_ratelimit())
  			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
  		return -EINVAL;
  	}
  
  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb)->gso_type =
+		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
+		SKB_GSO_TCPV4 :
+		SKB_GSO_TCPV6;
  
  	/* Header must be checked, and gso_segs computed. */
  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;

@@ -856,11 +861,42 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np,
  	return cons;
  }
  
-static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+static inline bool maybe_pull_tail(struct sk_buff *skb, unsigned int min,
+				   unsigned int max)

Should this routine return error code instead of a boolean? Otherwise 
it's not clear what "false" should mean --- whether it is that it failed 
to pull or that the pull wasn't needed.

  {
-	struct iphdr *iph;
-	int err = -EPROTO;
+	int target;
+
+	BUG_ON(max < min);
+
+	if (!skb_is_nonlinear(skb) || skb_headlen(skb) >= min)
+		return true;
+
+	/* If we need to pullup then pullup to max, so we hopefully
+	 * won't need to do it again.
+	 */

Comment style.

+	target = min_t(int, skb->len, max);
+	__pskb_pull_tail(skb, target - skb_headlen(skb));
+
+	if (skb_headlen(skb) < min) {

Why not explicitly check whether__pskb_pull_tail() returned NULL ?

+		net_err_ratelimited("Failed to pullup packet header\n");
+		return false;
+	}
+
+	return true;
+}
+
+/* This value should be large enough to cover a tagged ethernet header plus
+ * maximally sized IP and TCP or UDP headers.
+ */

Comment style.

quoted hunk ↗ jump to hunk

+#define MAX_IP_HEADER 128
+
+static int checksum_setup_ip(struct net_device *dev, struct sk_buff *skb)
+{
+	struct iphdr *iph = (void *)skb->data;
+	unsigned int header_size;
+	unsigned int off;
  	int recalculate_partial_csum = 0;
+	int err = -EPROTO;
  
  	/*
  	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy

@@ -879,40 +915,158 @@ static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
  	if (skb->ip_summed != CHECKSUM_PARTIAL)
  		return 0;
  
-	if (skb->protocol != htons(ETH_P_IP))
+	off = sizeof(struct iphdr);
+
+	header_size = skb->network_header + off;
+	if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
  		goto out;
  
-	iph = (void *)skb->data;
+	off = iph->ihl * 4;
  
  	switch (iph->protocol) {
  	case IPPROTO_TCP:
-		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+		if (!skb_partial_csum_set(skb, off,
  					  offsetof(struct tcphdr, check)))
  			goto out;
  
  		if (recalculate_partial_csum) {
  			struct tcphdr *tcph = tcp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct tcphdr);

You can put these (off and sizeof) onto the same line.

+			if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
+				goto out;
+
  			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
+							 skb->len - off,
  							 IPPROTO_TCP, 0);
  		}
  		break;
  	case IPPROTO_UDP:
-		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+		if (!skb_partial_csum_set(skb, off,
  					  offsetof(struct udphdr, check)))
  			goto out;
  
  		if (recalculate_partial_csum) {
  			struct udphdr *udph = udp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct udphdr);
+			if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
+				goto out;
+
  			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
+							 skb->len - off,
  							 IPPROTO_UDP, 0);
  		}
  		break;
  	default:
-		if (net_ratelimit())
-			pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
-			       iph->protocol);
+		net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
+				    iph->protocol);
+		goto out;
+	}
+
+	err = 0;
+
+out:
+	return err;
+}
+
+/* This value should be large enough to cover a tagged ethernet header plus
+ * an IPv6 header, all options, and a maximal TCP or UDP header.
+ */
+#define MAX_IPV6_HEADER 256
+
+static int checksum_setup_ipv6(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h = (void *)skb->data;
+	u8 nexthdr;
+	unsigned int header_size;
+	unsigned int off;
+	bool fragment;
+	bool done;
+	int err = -EPROTO;
+
+	done = false;

This should probably be moved down to the beginning of the while loop. 
And you also need to initialize fragment to "false" (and possibly rename 
it to is_fragment?)

+
+	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	off = sizeof(struct ipv6hdr);
+
+	header_size = skb->network_header + off;
+	if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
+		goto out;
+
+	nexthdr = ipv6h->nexthdr;
+
+	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
+	       !done) {
+		switch (nexthdr) {
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING: {
+			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct ipv6_opt_hdr);

I'd merge the last two lines.

+			if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
+				goto out;
+
+			nexthdr = hp->nexthdr;
+			off += ipv6_optlen(hp);
+			break;
+		}
+		case IPPROTO_AH: {
+			struct ip_auth_hdr *hp = (void *)(skb->data + off);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct ip_auth_hdr);

Here as well.

quoted hunk ↗ jump to hunk

+			if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
+				goto out;
+
+			nexthdr = hp->nexthdr;
+			off += ipv6_ahlen(hp);
+			break;
+		}
+		case IPPROTO_FRAGMENT:
+			fragment = true;
+			/* fall through */
+		default:
+			done = true;
+			break;
+		}
+	}
+
+	if (!done) {
+		net_err_ratelimited("Failed to parse packet header\n");
+		goto out;
+	}
+
+	if (fragment) {
+		net_err_ratelimited("Packet is a fragment!\n");
+		goto out;
+	}
+
+	switch (nexthdr) {
+	case IPPROTO_TCP:
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct tcphdr, check)))
+			goto out;
+		break;
+	case IPPROTO_UDP:
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct udphdr, check)))
+			goto out;
+		break;
+	default:
+		net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
+				    nexthdr);
  		goto out;
  	}

@@ -922,6 +1076,25 @@ out:
  	return err;
  }
  
+static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+{
+	int err;

Initialize to -EPROTO (just to keep consistent with the rest of the file)

quoted hunk ↗ jump to hunk

+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		err = checksum_setup_ip(dev, skb);
+		break;
+	case htons(ETH_P_IPV6):
+		err = checksum_setup_ipv6(dev, skb);
+		break;
+	default:
+		err = -EPROTO;
+		break;
+	}
+
+	return err;
+}
+
  static int handle_incoming_queue(struct net_device *dev,
  				 struct sk_buff_head *rxq)
  {

@@ -1232,6 +1405,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
  			features &= ~NETIF_F_SG;
  	}
  
+	if (features & NETIF_F_IPV6_CSUM) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-ipv6-csum-offload", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_IPV6_CSUM;
+	}
+
  	if (features & NETIF_F_TSO) {
  		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
  				 "feature-gso-tcpv4", "%d", &val) < 0)
@@ -1241,6 +1423,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
  			features &= ~NETIF_F_TSO;
  	}
  
+	if (features & NETIF_F_TSO6) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-gso-tcpv6", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_TSO6;
+	}
+
  	return features;
  }
  
@@ -1373,7 +1564,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
  	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
  	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
  				  NETIF_F_GSO_ROBUST;
-	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
+	netdev->hw_features	= NETIF_F_SG |
+		                  NETIF_F_IPV6_CSUM |
+		                  NETIF_F_TSO | NETIF_F_TSO6;

Can you merge these three lines and stay under 80? If not, merge either 
of the two of them.


-boris

quoted hunk ↗ jump to hunk

  
  	/*
           * Assume that all hw features are available for now. This set

@@ -1751,6 +1944,18 @@ again:
  		goto abort_transaction;
  	}
  
+	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv6";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload", "%d", 1);
+	if (err) {
+		message = "writing feature-ipv6-csum-offload";
+		goto abort_transaction;
+	}
+
  	err = xenbus_transaction_end(xbt, 0);
  	if (err) {
  		if (err == -EAGAIN)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5d89d1b..10f1b03 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h

@@ -4,6 +4,8 @@
  #include <uapi/linux/ipv6.h>
  
  #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
+#define ipv6_ahlen(p)   (((p)->hdrlen+2) << 2);
+
  /*
   * This structure contains configuration options per IPv6 link.
   */

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help