Thread (10 messages) 10 messages, 3 authors, 2010-02-07

Re: [RFC PATCH 1/2] USB: add HCD_BOUNCE_BUFFERS host controller driver flag

From: Alan Stern <stern@rowland.harvard.edu>
Date: 2010-02-03 19:46:53

On Wed, 3 Feb 2010, Albert Herranz wrote:
The HCD_BOUNCE_BUFFERS USB host controller driver flag can be enabled
to instruct the USB stack to always bounce USB buffers to/from coherent
memory buffers _just_ before/after a host controller transmission.

This setting allows overcoming some platform-specific limitations.

For example, the Nintendo Wii video game console is a NOT_COHERENT_CACHE
platform that is unable to safely perform non-32 bit uncached writes
to RAM because the byte enables are not connected to the bus.
Thus, in that platform, "coherent" DMA buffers cannot be directly used
by the kernel code unless it guarantees that all write accesses
to said buffers are done in 32 bit chunks (which is not the case in the
USB subsystem).

To avoid this unwanted behaviour HCD_BOUNCE_BUFFERS can be enabled at
the HCD controller, causing buffer allocations to be satisfied from
normal memory and, only at the very last moment, before the actual
transfer, buffers get copied to/from their corresponding DMA coherent
bounce buffers.

Note that HCD_LOCAL_MEM doesn't help in solving this problem as in that
case buffers may be allocated from coherent memory in the first place
and thus potentially accessed in non-32 bit chuncks by USB drivers.
This description sounds hopelessly confused.  Maybe you're just
misusing the term "coherent".  The patch itself doesn't affect the
coherent DMA mappings anyway; it affects the streaming mappings.  Or to
put it another way, what's the justification for replacing a call to
dma_map_single() with a call to dma_alloc_coherent()?

Since the patch doesn't affect any of the coherent mappings (see for 
example the calls to dma_pool_create() in ehci-mem.c), I don't see how 
it can possibly do what you claim.
+/**
+ * hcd_memcpy32_to_coherent - copy data to a bounce buffer
+ * @dst: destination dma bounce buffer
+ * @src: source buffer
+ * @len: number of bytes to copy
+ *
+ * This function copies @len bytes from @src to @dst in 32 bit chunks.
+ * The caller must guarantee that @dst length is 4 byte aligned and
+ * that @dst length is greater than or equal to @src length.
+ */
+static void *hcd_memcpy32_to_coherent(void *dst, const void *src, size_t len)
+{
+	u32 *q = dst, *p = (void *)src;
+	u8 *s;
+
+	while (len >= 4) {
+		*q++ = *p++;
+		len -= 4;
+	}
+	s = (u8 *)p;
+	switch (len) {
+	case 3:
+		*q = s[0] << 24 | s[1] << 16 | s[2] << 8;
+		break;
+	case 2:
+		*q = s[0] << 24 | s[1] << 16;
+		break;
+	case 1:
+		*q = s[0] << 24;
+		break;
+	default:
+		break;
+	}
+	return dst;
+}
What happens if somebody tries to use this code on a little-endian CPU?
quoted hunk ↗ jump to hunk
 static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 			   gfp_t mem_flags)
 {
@@ -1274,53 +1441,80 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 	if (is_root_hub(urb->dev))
 		return 0;
 
-	if (usb_endpoint_xfer_control(&urb->ep->desc)
-	    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
-		if (hcd->self.uses_dma) {
-			urb->setup_dma = dma_map_single(
-					hcd->self.controller,
-					urb->setup_packet,
-					sizeof(struct usb_ctrlrequest),
-					DMA_TO_DEVICE);
-			if (dma_mapping_error(hcd->self.controller,
-						urb->setup_dma))
-				return -EAGAIN;
-		} else if (hcd->driver->flags & HCD_LOCAL_MEM)
-			ret = hcd_alloc_coherent(
-					urb->dev->bus, mem_flags,
+	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
+		if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) {
+			if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
+				urb->setup_dma = 0;
+			ret = hcd_bounce_to_coherent(
+					hcd->self.controller, mem_flags,
 					&urb->setup_dma,
 					(void **)&urb->setup_packet,
 					sizeof(struct usb_ctrlrequest),
 					DMA_TO_DEVICE);
+		} else if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
+			if (hcd->self.uses_dma) {
+				urb->setup_dma = dma_map_single(
+						hcd->self.controller,
+						urb->setup_packet,
+						sizeof(struct usb_ctrlrequest),
+						DMA_TO_DEVICE);
+				if (dma_mapping_error(hcd->self.controller,
+						      urb->setup_dma))
+					return -EAGAIN;
+			} else if (hcd->driver->flags & HCD_LOCAL_MEM)
+				ret = hcd_alloc_coherent(
+						urb->dev->bus, mem_flags,
+						&urb->setup_dma,
+						(void **)&urb->setup_packet,
+						sizeof(struct usb_ctrlrequest),
+						DMA_TO_DEVICE);
+		}
 	}
 
 	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	if (ret == 0 && urb->transfer_buffer_length != 0
-	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		if (hcd->self.uses_dma) {
-			urb->transfer_dma = dma_map_single (
-					hcd->self.controller,
-					urb->transfer_buffer,
-					urb->transfer_buffer_length,
-					dir);
-			if (dma_mapping_error(hcd->self.controller,
-						urb->transfer_dma))
-				return -EAGAIN;
-		} else if (hcd->driver->flags & HCD_LOCAL_MEM) {
-			ret = hcd_alloc_coherent(
-					urb->dev->bus, mem_flags,
+	if (ret == 0 && urb->transfer_buffer_length != 0) {
+		if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) {
+			if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP))
+				urb->transfer_dma = 0;
+			ret = hcd_bounce_to_coherent(
+					hcd->self.controller, mem_flags,
 					&urb->transfer_dma,
 					&urb->transfer_buffer,
 					urb->transfer_buffer_length,
 					dir);
 
-			if (ret && usb_endpoint_xfer_control(&urb->ep->desc)
-			    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
-				hcd_free_coherent(urb->dev->bus,
-					&urb->setup_dma,
-					(void **)&urb->setup_packet,
-					sizeof(struct usb_ctrlrequest),
-					DMA_TO_DEVICE);
+			if (ret && usb_endpoint_xfer_control(&urb->ep->desc))
+				hcd_bounce_from_coherent(hcd->self.controller,
+						&urb->setup_dma,
+						(void **)&urb->setup_packet,
+						sizeof(struct usb_ctrlrequest),
+						DMA_TO_DEVICE);
+		} else if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
+			if (hcd->self.uses_dma) {
+				urb->transfer_dma = dma_map_single(
+						hcd->self.controller,
+						urb->transfer_buffer,
+						urb->transfer_buffer_length,
+						dir);
+				if (dma_mapping_error(hcd->self.controller,
+						      urb->transfer_dma))
+					return -EAGAIN;
+			} else if (hcd->driver->flags & HCD_LOCAL_MEM) {
+				ret = hcd_alloc_coherent(
+						urb->dev->bus, mem_flags,
+						&urb->transfer_dma,
+						&urb->transfer_buffer,
+						urb->transfer_buffer_length,
+						dir);
+
+				if (ret &&
+				    usb_endpoint_xfer_control(&urb->ep->desc))
+					hcd_free_coherent(urb->dev->bus,
+						&urb->setup_dma,
+						(void **)&urb->setup_packet,
+						sizeof(struct usb_ctrlrequest),
+						DMA_TO_DEVICE);
+			}
 		}
 	}
 	return ret;
It seems that every time somebody comes up with a new kind of 
memory-access restriction, this function grows by a factor of 2.  After 
a few more iterations it will be larger than the rest of the kernel!

There must be a better way to structure the requirements here.

Alan Stern
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help