Inter-revision diff: patch 11

Comparing v7 (message) to v3 (message)

--- v7
+++ v3
@@ -1,107 +1,330 @@
-This replaces multiple calls of kzalloc_node() with a new
-iommu_table_alloc() helper. Right now it calls kzalloc_node() but
-later it will be modified to allocate a iommu_table_group struct with
-a single iommu_table in it.
-
-Later the helper will allocate a iommu_table_group struct which embeds
-the iommu table(s).
+The pnv_pci_ioda_tce_invalidate() helper invalidates TCE cache. It is
+supposed to be called on IODA1/2 and not called on p5ioc2. It receives
+start and end host addresses of TCE table. This approach makes it possible
+to get pnv_pci_ioda_tce_invalidate() unintentionally called on p5ioc2.
+Another issue is that IODA2 needs PCI addresses to invalidate the cache
+and those can be calculated from host addresses but since we are going
+to implement multi-level TCE tables, calculating PCI address from
+a host address might get either tricky or ugly as TCE table remains flat
+on PCI bus but not in RAM.
+
+This defines separate iommu_table_ops callbacks for p5ioc2 and IODA1/2
+PHBs. They all call common pnv_tce_build/pnv_tce_free/pnv_tce_get helpers
+but call PHB specific TCE invalidation helper (when needed).
+
+This changes pnv_pci_ioda2_tce_invalidate() to receives TCE index and
+number of pages which are PCI addresses shifted by IOMMU page shift.
+
+The patch is pretty mechanical and behaviour is not expected to change.
 
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
 ---
- arch/powerpc/include/asm/iommu.h       |  1 +
- arch/powerpc/kernel/iommu.c            |  9 +++++++++
- arch/powerpc/platforms/powernv/pci.c   |  2 +-
- arch/powerpc/platforms/pseries/iommu.c | 12 ++++--------
- 4 files changed, 15 insertions(+), 9 deletions(-)
-
-diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
-index d909e2a..eb75726 100644
---- a/arch/powerpc/include/asm/iommu.h
-+++ b/arch/powerpc/include/asm/iommu.h
-@@ -117,6 +117,7 @@ static inline void *get_iommu_table_base(struct device *dev)
- 	return dev->archdata.dma_data.iommu_table_base;
- }
- 
-+extern struct iommu_table *iommu_table_alloc(int node);
- /* Frees table for an individual device node */
- extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
- 
-diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
-index eceb214..b39d00a 100644
---- a/arch/powerpc/kernel/iommu.c
-+++ b/arch/powerpc/kernel/iommu.c
-@@ -710,6 +710,15 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
- 	return tbl;
- }
- 
-+struct iommu_table *iommu_table_alloc(int node)
+ arch/powerpc/platforms/powernv/pci-ioda.c   | 92 ++++++++++++++++++++++-------
+ arch/powerpc/platforms/powernv/pci-p5ioc2.c |  8 ++-
+ arch/powerpc/platforms/powernv/pci.c        | 76 +++++++++---------------
+ arch/powerpc/platforms/powernv/pci.h        |  7 ++-
+ 4 files changed, 110 insertions(+), 73 deletions(-)
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index a33a116..dfc56fc 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -1041,18 +1041,20 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
+ 	}
+ }
+ 
+-static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
+-					 struct iommu_table *tbl,
+-					 __be64 *startp, __be64 *endp, bool rm)
++static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
++		unsigned long index, unsigned long npages, bool rm)
+ {
++	struct pnv_ioda_pe *pe = container_of(tbl->it_iommu,
++			struct pnv_ioda_pe, iommu);
+ 	__be64 __iomem *invalidate = rm ?
+ 		(__be64 __iomem *)pe->tce_inval_reg_phys :
+ 		(__be64 __iomem *)tbl->it_index;
+ 	unsigned long start, end, inc;
+ 	const unsigned shift = tbl->it_page_shift;
+ 
+-	start = __pa(startp);
+-	end = __pa(endp);
++	start = __pa((__be64 *)tbl->it_base + index - tbl->it_offset);
++	end = __pa((__be64 *)tbl->it_base + index - tbl->it_offset +
++			npages - 1);
+ 
+ 	/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
+ 	if (tbl->it_busno) {
+@@ -1088,10 +1090,40 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
+ 	 */
+ }
+ 
+-static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
+-					 struct iommu_table *tbl,
+-					 __be64 *startp, __be64 *endp, bool rm)
++static int pnv_ioda1_tce_build_vm(struct iommu_table *tbl, long index,
++		long npages, unsigned long uaddr,
++		enum dma_data_direction direction,
++		struct dma_attrs *attrs)
+ {
++	long ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
++			attrs);
++
++	if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
++		pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
++
++	return ret;
++}
++
++static void pnv_ioda1_tce_free_vm(struct iommu_table *tbl, long index,
++		long npages)
 +{
-+	struct iommu_table *tbl;
-+
-+	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
-+
-+	return tbl;
++	pnv_tce_free(tbl, index, npages);
++
++	if (tbl->it_type & TCE_PCI_SWINV_FREE)
++		pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
 +}
 +
- void iommu_free_table(struct iommu_table *tbl, const char *node_name)
- {
- 	unsigned long bitmap_sz;
++struct iommu_table_ops pnv_ioda1_iommu_ops = {
++	.set = pnv_ioda1_tce_build_vm,
++	.clear = pnv_ioda1_tce_free_vm,
++	.get = pnv_tce_get,
++};
++
++static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
++		unsigned long index, unsigned long npages, bool rm)
++{
++	struct pnv_ioda_pe *pe = container_of(tbl->it_iommu,
++			struct pnv_ioda_pe, iommu);
+ 	unsigned long start, end, inc;
+ 	__be64 __iomem *invalidate = rm ?
+ 		(__be64 __iomem *)pe->tce_inval_reg_phys :
+@@ -1104,9 +1136,9 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
+ 	end = start;
+ 
+ 	/* Figure out the start, end and step */
+-	inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
++	inc = tbl->it_offset + index / sizeof(u64);
+ 	start |= (inc << shift);
+-	inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
++	inc = tbl->it_offset + (index + npages - 1) / sizeof(u64);
+ 	end |= (inc << shift);
+ 	inc = (0x1ull << shift);
+ 	mb();
+@@ -1120,19 +1152,35 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
+ 	}
+ }
+ 
+-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
+-				 __be64 *startp, __be64 *endp, bool rm)
++static int pnv_ioda2_tce_build_vm(struct iommu_table *tbl, long index,
++		long npages, unsigned long uaddr,
++		enum dma_data_direction direction,
++		struct dma_attrs *attrs)
+ {
+-	struct pnv_ioda_pe *pe = container_of(tbl->it_iommu, struct pnv_ioda_pe,
+-					      iommu);
+-	struct pnv_phb *phb = pe->phb;
+-
+-	if (phb->type == PNV_PHB_IODA1)
+-		pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
+-	else
+-		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
++	long ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
++			attrs);
++
++	if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
++		pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
++
++	return ret;
+ }
+ 
++static void pnv_ioda2_tce_free_vm(struct iommu_table *tbl, long index,
++		long npages)
++{
++	pnv_tce_free(tbl, index, npages);
++
++	if (tbl->it_type & TCE_PCI_SWINV_FREE)
++		pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
++}
++
++static struct iommu_table_ops pnv_ioda2_iommu_ops = {
++	.set = pnv_ioda2_tce_build_vm,
++	.clear = pnv_ioda2_tce_free_vm,
++	.get = pnv_tce_get,
++};
++
+ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+ 				      struct pnv_ioda_pe *pe, unsigned int base,
+ 				      unsigned int segs)
+@@ -1212,7 +1260,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+ 				 TCE_PCI_SWINV_FREE   |
+ 				 TCE_PCI_SWINV_PAIR);
+ 	}
+-	tbl->it_ops = &pnv_iommu_ops;
++	tbl->it_ops = &pnv_ioda1_iommu_ops;
+ 	iommu_init_table(tbl, phb->hose->node);
+ 	iommu_register_group(&pe->iommu, phb->hose->global_number,
+ 			pe->pe_number);
+@@ -1363,7 +1411,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ 				8);
+ 		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+ 	}
+-	tbl->it_ops = &pnv_iommu_ops;
++	tbl->it_ops = &pnv_ioda2_iommu_ops;
+ 	iommu_init_table(tbl, phb->hose->node);
+ 	pe->iommu.ops = &pnv_pci_ioda2_ops;
+ 	iommu_register_group(&pe->iommu, phb->hose->global_number,
+diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+index e8af682..27ddaca 100644
+--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
++++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+@@ -83,11 +83,17 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
+ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
+ #endif /* CONFIG_PCI_MSI */
+ 
++static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
++	.set = pnv_tce_build,
++	.clear = pnv_tce_free,
++	.get = pnv_tce_get,
++};
++
+ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
+ 					 struct pci_dev *pdev)
+ {
+ 	if (phb->p5ioc2.iommu.tables[0].it_map == NULL) {
+-		phb->p5ioc2.iommu.tables[0].it_ops = &pnv_iommu_ops;
++		phb->p5ioc2.iommu.tables[0].it_ops = &pnv_p5ioc2_iommu_ops;
+ 		iommu_init_table(&phb->p5ioc2.iommu.tables[0], phb->hose->node);
+ 		iommu_register_group(&phb->p5ioc2.iommu,
+ 				pci_domain_nr(phb->hose->bus), phb->opal_id);
 diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
-index c619ec6..1c31ac8 100644
+index e6f2c43..3ab69e2 100644
 --- a/arch/powerpc/platforms/powernv/pci.c
 +++ b/arch/powerpc/platforms/powernv/pci.c
-@@ -680,7 +680,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
- 		       hose->dn->full_name);
- 		return NULL;
- 	}
--	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
-+	tbl = iommu_table_alloc(hose->node);
- 	if (WARN_ON(!tbl))
+@@ -602,70 +602,48 @@ static unsigned long pnv_dmadir_to_flags(enum dma_data_direction direction)
+ 	}
+ }
+ 
+-static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+-			 unsigned long uaddr, enum dma_data_direction direction,
+-			 struct dma_attrs *attrs, bool rm)
++static __be64 *pnv_tce(struct iommu_table *tbl, long index)
++{
++	__be64 *tmp = ((__be64 *)tbl->it_base);
++
++	return tmp + index;
++}
++
++int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
++		unsigned long uaddr, enum dma_data_direction direction,
++		struct dma_attrs *attrs)
+ {
+ 	u64 proto_tce = pnv_dmadir_to_flags(direction);
+-	__be64 *tcep, *tces;
+-	u64 rpn;
++	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
++	long i;
+ 
+-	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
+-	rpn = __pa(uaddr) >> tbl->it_page_shift;
++	for (i = 0; i < npages; i++) {
++		unsigned long newtce = proto_tce |
++				((rpn + i) << tbl->it_page_shift);
++		unsigned long idx = index - tbl->it_offset + i;
+ 
+-	while (npages--)
+-		*(tcep++) = cpu_to_be64(proto_tce |
+-				(rpn++ << tbl->it_page_shift));
+-
+-	/* Some implementations won't cache invalid TCEs and thus may not
+-	 * need that flush. We'll probably turn it_type into a bit mask
+-	 * of flags if that becomes the case
+-	 */
+-	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
+-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
++		*(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
++	}
+ 
+ 	return 0;
+ }
+ 
+-static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
+-			    unsigned long uaddr,
+-			    enum dma_data_direction direction,
+-			    struct dma_attrs *attrs)
++void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+ {
+-	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
+-			false);
+-}
+-
+-static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
+-		bool rm)
+-{
+-	__be64 *tcep, *tces;
+-
+-	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
++	long i;
+ 
+-	while (npages--)
+-		*(tcep++) = cpu_to_be64(0);
++	for (i = 0; i < npages; i++) {
++		unsigned long idx = index - tbl->it_offset + i;
+ 
+-	if (tbl->it_type & TCE_PCI_SWINV_FREE)
+-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
++		*(pnv_tce(tbl, idx)) = cpu_to_be64(0);
++	}
+ }
+ 
+-static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
++unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+ {
+-	pnv_tce_free(tbl, index, npages, false);
++	return *(pnv_tce(tbl, index));
+ }
+ 
+-static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+-{
+-	return ((u64 *)tbl->it_base)[index - tbl->it_offset];
+-}
+-
+-struct iommu_table_ops pnv_iommu_ops = {
+-	.set = pnv_tce_build_vm,
+-	.clear = pnv_tce_free_vm,
+-	.get = pnv_tce_get,
+-};
+-
+ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+ 			       void *tce_mem, u64 tce_size,
+ 			       u64 dma_offset, unsigned page_shift)
+@@ -698,7 +676,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
  		return NULL;
  	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
-diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
-index 48d1fde..41a8b14 100644
---- a/arch/powerpc/platforms/pseries/iommu.c
-+++ b/arch/powerpc/platforms/pseries/iommu.c
-@@ -617,8 +617,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
- 	pci->phb->dma_window_size = 0x8000000ul;
- 	pci->phb->dma_window_base_cur = 0x8000000ul;
- 
--	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
--			   pci->phb->node);
-+	tbl = iommu_table_alloc(pci->phb->node);
- 
- 	iommu_table_setparms(pci->phb, dn, tbl);
- 	tbl->it_ops = &iommu_table_pseries_ops;
-@@ -669,8 +668,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
- 		 pdn->full_name, ppci->iommu_table);
- 
- 	if (!ppci->iommu_table) {
--		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
--				   ppci->phb->node);
-+		tbl = iommu_table_alloc(ppci->phb->node);
- 		iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
- 		tbl->it_ops = &iommu_table_lpar_multi_ops;
- 		ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
-@@ -697,8 +695,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
- 		struct pci_controller *phb = PCI_DN(dn)->phb;
- 
- 		pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
--		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
--				   phb->node);
-+		tbl = iommu_table_alloc(phb->node);
- 		iommu_table_setparms(phb, dn, tbl);
- 		tbl->it_ops = &iommu_table_pseries_ops;
- 		PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
-@@ -1120,8 +1117,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
- 
- 	pci = PCI_DN(pdn);
- 	if (!pci->iommu_table) {
--		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
--				   pci->phb->node);
-+		tbl = iommu_table_alloc(pci->phb->node);
- 		iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
- 		tbl->it_ops = &iommu_table_lpar_multi_ops;
- 		pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
+ 				  be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K);
+-	tbl->it_ops = &pnv_iommu_ops;
++	tbl->it_ops = &pnv_ioda1_iommu_ops;
+ 	iommu_init_table(tbl, hose->node);
+ 	iommu_register_group(tbl->it_iommu, pci_domain_nr(hose->bus), 0);
+ 
+diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
+index 19f3985..724bce9 100644
+--- a/arch/powerpc/platforms/powernv/pci.h
++++ b/arch/powerpc/platforms/powernv/pci.h
+@@ -216,7 +216,12 @@ extern struct pci_ops pnv_pci_ops;
+ #ifdef CONFIG_EEH
+ extern struct pnv_eeh_ops ioda_eeh_ops;
+ #endif
+-extern struct iommu_table_ops pnv_iommu_ops;
++extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
++		unsigned long uaddr, enum dma_data_direction direction,
++		struct dma_attrs *attrs);
++extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
++extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
++extern struct iommu_table_ops pnv_ioda1_iommu_ops;
+ 
+ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+ 				unsigned char *log_buff);
 -- 
 2.0.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help