Thread (10 messages) 10 messages, 3 authors, 2006-10-24

Re: [RFC]: map 4K iommu pages even on 64K largepage systems.

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: 2006-10-24 04:43:39

Haven't yet thought if this is a good long-term solution or not,
whether this kind of thing is desirable or not.  That's why its 
an RFC.  Comments?
Ok, after some time reading the details of the patch, it looks fairly
good, just a few nits. Also, we need to fix the DART code still :)

Ben

 
+static inline unsigned long
+iommu_num_pages(unsigned long vaddr, unsigned long slen)
Coding style in that file is to do:

static inline unsigned long iommu_num_pages(unsigned long vaddr,
					    unsigned long slen)

quoted hunk ↗ jump to hunk
@@ -557,8 +562,8 @@ void iommu_unmap_single(struct iommu_tab
 	BUG_ON(direction == DMA_NONE);
 
 	if (tbl)
-		iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
-					(dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
+		iommu_free(tbl, dma_handle, (IOMMU_PAGE_ALIGN(dma_handle + size) -
+					(dma_handle & IOMMU_PAGE_MASK)) >> IOMMU_PAGE_SHIFT);
 }
Use iommu_num_pages ?
quoted hunk ↗ jump to hunk
 /* Allocates a contiguous real buffer and creates mappings over it.
@@ -571,6 +576,7 @@ void *iommu_alloc_coherent(struct iommu_
 	void *ret = NULL;
 	dma_addr_t mapping;
 	unsigned int npages, order;
+	unsigned int nio_pages, io_order;
 	struct page *page;
 
 	size = PAGE_ALIGN(size);
@@ -598,8 +604,10 @@ void *iommu_alloc_coherent(struct iommu_
 	memset(ret, 0, size);
 
 	/* Set up tces to cover the allocated range */
-	mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL,
-			      mask >> PAGE_SHIFT, order);
+	nio_pages = size >> IOMMU_PAGE_SHIFT;
+	io_order = get_iommu_order(size);
Not directly related to your patch, but do we really have this
requirement of allocating with an alignement of the order of the
alloation size here ?

Also, your code assumes that PAGE_SHIFT >= IOMMU_PAGE_SHIFT... it might
be useful to not have this restriction in fact. Just maybe use
iommu_num_pages(size) instead of size >> IOMMU_PAGE_SHIFT here ...
quoted hunk ↗ jump to hunk
+	mapping = iommu_alloc(tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
+			      mask >> IOMMU_PAGE_SHIFT, io_order);
 	if (mapping == DMA_ERROR_CODE) {
 		free_pages((unsigned long)ret, order);
 		return NULL;
@@ -614,9 +622,10 @@ void iommu_free_coherent(struct iommu_ta
 	unsigned int npages;
 
 	if (tbl) {
-		size = PAGE_ALIGN(size);
-		npages = size >> PAGE_SHIFT;
+		size = IOMMU_PAGE_ALIGN(size);
+		npages = size >> IOMMU_PAGE_SHIFT;

 		iommu_free(tbl, dma_handle, npages);
+		size = PAGE_ALIGN(size);
 		free_pages((unsigned long)vaddr, get_order(size));
 	}
All those repeated alignments on size and one loses track... Are you
sure you'll always end up with the proper values ? I'd rather use
iommu_num_pages() for the iommu_free and only align size once with
PAGE_ALIGN for free_pages().
quoted hunk ↗ jump to hunk
 }
Index: linux-2.6.19-rc1-git11/include/asm-powerpc/iommu.h
===================================================================
--- linux-2.6.19-rc1-git11.orig/include/asm-powerpc/iommu.h	2006-09-19 22:42:06.000000000 -0500
+++ linux-2.6.19-rc1-git11/include/asm-powerpc/iommu.h	2006-10-23 18:28:21.000000000 -0500
@@ -23,16 +23,41 @@
 #ifdef __KERNEL__
 
 #include <asm/types.h>
+#include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 
+#define IOMMU_PAGE_SHIFT      12
+#define IOMMU_PAGE_SIZE       (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
+#define IOMMU_PAGE_MASK       (~((1 << IOMMU_PAGE_SHIFT) - 1))
+#define IOMMU_PAGE_ALIGN(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+
+/* Pure 2^n version of get_order */
+static __inline__ __attribute_const__ int get_iommu_order(unsigned long size)
+{
+	int order;
+
+	size = (size - 1) >> (IOMMU_PAGE_SHIFT - 1);
+	order = -1;
+	do {
+		size >>= 1;
+		order++;
+	} while (size);
+	return order;
+}
+
+#endif   /* __ASSEMBLY__ */
Use cntlzw... or ilog2, which would give something like :

	return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT) + 1;

I think Dave Howells has patches fixing get_order generically to be
implemented in terms of ilog2. (I just discovered that on ppc64, we
didn't use cntlzw/ilog2 for it, sucks, but let's wait for Dave patches
rather than fixing it ourselves now).
+
 /*
  * IOMAP_MAX_ORDER defines the largest contiguous block
  * of dma space we can get.  IOMAP_MAX_ORDER = 13
  * allows up to 2**12 pages (4096 * 4096) = 16 MB
  */
-#define IOMAP_MAX_ORDER 13
+#define IOMAP_MAX_ORDER (25 - PAGE_SHIFT)
I'm not completely sure we want the above to be defined as a function of
the page shift... 
quoted hunk ↗ jump to hunk
 struct iommu_table {
 	unsigned long  it_busno;     /* Bus number this table belongs to */
Index: linux-2.6.19-rc1-git11/include/asm-powerpc/tce.h
===================================================================
--- linux-2.6.19-rc1-git11.orig/include/asm-powerpc/tce.h	2006-09-19 22:42:06.000000000 -0500
+++ linux-2.6.19-rc1-git11/include/asm-powerpc/tce.h	2006-10-23 15:46:57.000000000 -0500
@@ -22,6 +22,8 @@
 #define _ASM_POWERPC_TCE_H
 #ifdef __KERNEL__
 
+#include <asm/iommu.h>
+
 /*
  * Tces come in two formats, one for the virtual bus and a different
  * format for PCI
@@ -33,7 +35,7 @@
 
 #define TCE_SHIFT	12
 #define TCE_PAGE_SIZE	(1 << TCE_SHIFT)
-#define TCE_PAGE_FACTOR	(PAGE_SHIFT - TCE_SHIFT)
+#define TCE_PAGE_FACTOR	(IOMMU_PAGE_SHIFT - TCE_SHIFT)
 
 #define TCE_ENTRY_SIZE		8		/* each TCE is 64 bits */
We can actually remove TCE_PAGE_FACTOR and code using it completely.

With your patch, TCE_SHIFT == IOMMU_PAGE_SHIFT, and in a second step, we
can start having the iommu_page_shift be a member of iommu_table. Thus
there will no longer be any need for a correction factor in the backend.
quoted hunk ↗ jump to hunk
Index: linux-2.6.19-rc1-git11/arch/powerpc/kernel/vio.c
===================================================================
--- linux-2.6.19-rc1-git11.orig/arch/powerpc/kernel/vio.c	2006-10-13 11:52:51.000000000 -0500
+++ linux-2.6.19-rc1-git11/arch/powerpc/kernel/vio.c	2006-10-23 16:06:43.000000000 -0500
@@ -92,9 +92,9 @@ static struct iommu_table *vio_build_iom
 				&tbl->it_index, &offset, &size);
 
 		/* TCE table size - measured in tce entries */
-		tbl->it_size = size >> PAGE_SHIFT;
+		tbl->it_size = size >> IOMMU_PAGE_SHIFT;
 		/* offset for VIO should always be 0 */
-		tbl->it_offset = offset >> PAGE_SHIFT;
+		tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
 		tbl->it_busno = 0;
 		tbl->it_type = TCE_VB;
 
Index: linux-2.6.19-rc1-git11/arch/powerpc/platforms/pseries/iommu.c
===================================================================
--- linux-2.6.19-rc1-git11.orig/arch/powerpc/platforms/pseries/iommu.c	2006-10-13 11:54:00.000000000 -0500
+++ linux-2.6.19-rc1-git11/arch/powerpc/platforms/pseries/iommu.c	2006-10-23 18:42:03.000000000 -0500
@@ -289,7 +289,7 @@ static void iommu_table_setparms(struct 
 	tbl->it_busno = phb->bus->number;
 
 	/* Units of tce entries */
-	tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
+	tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT;
 
 	/* Test if we are going over 2GB of DMA space */
 	if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
@@ -300,7 +300,7 @@ static void iommu_table_setparms(struct 
 	phb->dma_window_base_cur += phb->dma_window_size;
 
 	/* Set the tce table size - measured in entries */
-	tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
+	tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT;
 
 	tbl->it_index = 0;
 	tbl->it_blocksize = 16;
@@ -325,8 +325,8 @@ static void iommu_table_setparms_lpar(st
 	tbl->it_base   = 0;
 	tbl->it_blocksize  = 16;
 	tbl->it_type = TCE_PCI;
-	tbl->it_offset = offset >> PAGE_SHIFT;
-	tbl->it_size = size >> PAGE_SHIFT;
+	tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
+	tbl->it_size = size >> IOMMU_PAGE_SHIFT;
 }
 
 static void iommu_bus_setup_pSeries(struct pci_bus *bus)
@@ -522,8 +522,6 @@ static void iommu_dev_setup_pSeriesLP(st
 	const void *dma_window = NULL;
 	struct pci_dn *pci;
 
-	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
-
 	/* dev setup for LPAR is a little tricky, since the device tree might
 	 * contain the dma-window properties per-device and not neccesarily
 	 * for the bus. So we need to search upwards in the tree until we
@@ -532,6 +530,9 @@ static void iommu_dev_setup_pSeriesLP(st
 	 */
 	dn = pci_device_to_OF_node(dev);
 
+	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s) %s\n",
+	     dev, pci_name(dev), dn->full_name);
+
 	for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
 	     pdn = pdn->parent) {
 		dma_window = get_property(pdn, "ibm,dma-window", NULL);
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help