Inter-revision diff: patch 2

Comparing v11 (message) to v1 (message)

--- v11
+++ v1
@@ -1,172 +1,217 @@
-The set_iommu_table_base_and_group() name suggests that the function
-sets table base and add a device to an IOMMU group.
-
-The actual purpose for table base setting is to put some reference
-into a device so later iommu_add_device() can get the IOMMU group
-reference and the device to the group.
-
-At the moment a group cannot be explicitly passed to iommu_add_device()
-as we want it to work from the bus notifier, we can fix it later and
-remove confusing calls of set_iommu_table_base().
-
-This replaces set_iommu_table_base_and_group() with a couple of
-set_iommu_table_base() + iommu_add_device() which makes reading the code
-easier.
-
-This adds few comments why set_iommu_table_base() and iommu_add_device()
-are called where they are called.
-
-For IODA1/2, this essentially removes iommu_add_device() call from
-the pnv_pci_ioda_dma_dev_setup() as it will always fail at this particular
-place:
-- for physical PE, the device is already attached by iommu_add_device()
-in pnv_pci_ioda_setup_dma_pe();
-- for virtual PE, the sysfs entries are not ready to create all symlinks
-so actual adding is happening in tce_iommu_bus_notifier.
+At the moment pnv_pci_ioda_tce_invalidate() gets the PE pointer via
+container_of(tbl). Since we are going to have to add Dynamic DMA windows
+and that means having 2 IOMMU tables per PE, this is not going to work.
+
+This implements pnv_pci_ioda(1|2)_tce_invalidate as a pnv_ioda_pe callback.
+
+This adds a pnv_iommu_table wrapper around iommu_table and stores a pointer
+to PE there. PNV's ppc_md.tce_build() call uses this to find PE and
+do the invalidation. This will be used later for Dynamic DMA windows too.
+
+This registers invalidate() callbacks for IODA1 and IODA2:
+- pnv_pci_ioda1_tce_invalidate;
+- pnv_pci_ioda2_tce_invalidate.
 
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
-Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
 ---
-Changes:
-v10:
-* new to the series
----
- arch/powerpc/include/asm/iommu.h            |  7 -------
- arch/powerpc/platforms/powernv/pci-ioda.c   | 27 +++++++++++++++++++++++----
- arch/powerpc/platforms/powernv/pci-p5ioc2.c |  3 ++-
- arch/powerpc/platforms/pseries/iommu.c      | 15 ++++++++-------
- 4 files changed, 33 insertions(+), 19 deletions(-)
-
-diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
-index 1e27d63..8353c86 100644
---- a/arch/powerpc/include/asm/iommu.h
-+++ b/arch/powerpc/include/asm/iommu.h
-@@ -140,13 +140,6 @@ static inline int __init tce_iommu_bus_notifier_init(void)
- }
- #endif /* !CONFIG_IOMMU_API */
- 
--static inline void set_iommu_table_base_and_group(struct device *dev,
--						  void *base)
+ arch/powerpc/platforms/powernv/pci-ioda.c | 35 ++++++++++++-------------------
+ arch/powerpc/platforms/powernv/pci.c      | 31 ++++++++++++++++++++-------
+ arch/powerpc/platforms/powernv/pci.h      | 13 +++++++++++-
+ 3 files changed, 48 insertions(+), 31 deletions(-)
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index df241b1..136e765 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -857,7 +857,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
+ 
+ 	pe = &phb->ioda.pe_array[pdn->pe_number];
+ 	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
+-	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
++	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32.table);
+ }
+ 
+ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
+@@ -884,7 +884,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
+ 	} else {
+ 		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+ 		set_dma_ops(&pdev->dev, &dma_iommu_ops);
+-		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
++		set_iommu_table_base(&pdev->dev, &pe->tce32.table);
+ 	}
+ 	*pdev->dev.dma_mask = dma_mask;
+ 	return 0;
+@@ -899,9 +899,9 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
+ 	list_for_each_entry(dev, &bus->devices, bus_list) {
+ 		if (add_to_iommu_group)
+ 			set_iommu_table_base_and_group(&dev->dev,
+-						       &pe->tce32_table);
++						       &pe->tce32.table);
+ 		else
+-			set_iommu_table_base(&dev->dev, &pe->tce32_table);
++			set_iommu_table_base(&dev->dev, &pe->tce32.table);
+ 
+ 		if (dev->subordinate)
+ 			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+@@ -988,19 +988,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
+ 	}
+ }
+ 
+-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
+-				 __be64 *startp, __be64 *endp, bool rm)
 -{
--	set_iommu_table_base(dev, base);
--	iommu_add_device(dev);
+-	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+-					      tce32_table);
+-	struct pnv_phb *phb = pe->phb;
+-
+-	if (phb->type == PNV_PHB_IODA1)
+-		pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
+-	else
+-		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
 -}
 -
- extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
- 			    struct scatterlist *sglist, int nelems,
- 			    unsigned long mask,
-diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
-index 2f092bb..9a77f3c 100644
---- a/arch/powerpc/platforms/powernv/pci-ioda.c
-+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
-@@ -1598,7 +1598,13 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
- 
- 	pe = &phb->ioda.pe_array[pdn->pe_number];
- 	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
--	set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
-+	set_iommu_table_base(&pdev->dev, pe->tce32_table);
+ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+ 				      struct pnv_ioda_pe *pe, unsigned int base,
+ 				      unsigned int segs)
+@@ -1058,9 +1045,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+ 	}
+ 
+ 	/* Setup linux iommu table */
+-	tbl = &pe->tce32_table;
++	tbl = &pe->tce32.table;
+ 	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
+ 				  base << 28, IOMMU_PAGE_SHIFT_4K);
++	pe->tce32.pe = pe;
++	pe->tce32.invalidate_fn = pnv_pci_ioda1_tce_invalidate;
+ 
+ 	/* OPAL variant of P7IOC SW invalidated TCEs */
+ 	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+@@ -1097,7 +1086,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+ {
+ 	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+-					      tce32_table);
++					      tce32.table);
+ 	uint16_t window_id = (pe->pe_number << 1 ) + 1;
+ 	int64_t rc;
+ 
+@@ -1142,10 +1131,10 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
+ 	pe->tce_bypass_base = 1ull << 59;
+ 
+ 	/* Install set_bypass callback for VFIO */
+-	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
++	pe->tce32.table.set_bypass = pnv_pci_ioda2_set_bypass;
+ 
+ 	/* Enable bypass by default */
+-	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
++	pnv_pci_ioda2_set_bypass(&pe->tce32.table, true);
+ }
+ 
+ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+@@ -1193,9 +1182,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ 	}
+ 
+ 	/* Setup linux iommu table */
+-	tbl = &pe->tce32_table;
++	tbl = &pe->tce32.table;
+ 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
+ 			IOMMU_PAGE_SHIFT_4K);
++	pe->tce32.pe = pe;
++	pe->tce32.invalidate_fn = pnv_pci_ioda2_tce_invalidate;
+ 
+ 	/* OPAL variant of PHB3 invalidated TCEs */
+ 	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
+index b854b57..97895d4 100644
+--- a/arch/powerpc/platforms/powernv/pci.c
++++ b/arch/powerpc/platforms/powernv/pci.c
+@@ -599,6 +599,27 @@ struct pci_ops pnv_pci_ops = {
+ 	.write = pnv_pci_write_config,
+ };
+ 
++static void pnv_tce_invalidate(struct iommu_table *tbl, __be64 *startp,
++	__be64 *endp, bool rm)
++{
++	struct pnv_iommu_table *ptbl = container_of(tbl,
++			struct pnv_iommu_table, table);
++	struct pnv_ioda_pe *pe = ptbl->pe;
++
 +	/*
-+	 * Note: iommu_add_device() will fail here as
-+	 * for physical PE: the device is already added by now;
-+	 * for virtual PE: sysfs entries are not ready yet and
-+	 * tce_iommu_bus_notifier will add the device to a group later.
++	 * Some implementations won't cache invalid TCEs and thus may not
++	 * need that flush. We'll probably turn it_type into a bit mask
++	 * of flags if that becomes the case
 +	 */
- }
- 
- static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
-@@ -1659,7 +1665,8 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- 	struct pci_dev *dev;
- 
- 	list_for_each_entry(dev, &bus->devices, bus_list) {
--		set_iommu_table_base_and_group(&dev->dev, pe->tce32_table);
-+		set_iommu_table_base(&dev->dev, pe->tce32_table);
-+		iommu_add_device(&dev->dev);
- 
- 		if (dev->subordinate)
- 			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
-@@ -1835,7 +1842,13 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
- 	if (pe->flags & PNV_IODA_PE_DEV) {
- 		iommu_register_group(tbl, phb->hose->global_number,
- 				     pe->pe_number);
--		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-+		/*
-+		 * Setting table base here only for carrying iommu_group
-+		 * further down to let iommu_add_device() do the job.
-+		 * pnv_pci_ioda_dma_dev_setup will override it later anyway.
-+		 */
-+		set_iommu_table_base(&pe->pdev->dev, tbl);
-+		iommu_add_device(&pe->pdev->dev);
- 	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- 		iommu_register_group(tbl, phb->hose->global_number,
- 				     pe->pe_number);
-@@ -1963,7 +1976,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- 	if (pe->flags & PNV_IODA_PE_DEV) {
- 		iommu_register_group(tbl, phb->hose->global_number,
- 				     pe->pe_number);
--		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-+		/*
-+		 * Setting table base here only for carrying iommu_group
-+		 * further down to let iommu_add_device() do the job.
-+		 * pnv_pci_ioda_dma_dev_setup will override it later anyway.
-+		 */
-+		set_iommu_table_base(&pe->pdev->dev, tbl);
-+		iommu_add_device(&pe->pdev->dev);
- 	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- 		iommu_register_group(tbl, phb->hose->global_number,
- 				     pe->pe_number);
-diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
-index 4729ca7..b17d93615 100644
---- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
-+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
-@@ -92,7 +92,8 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
- 				pci_domain_nr(phb->hose->bus), phb->opal_id);
- 	}
- 
--	set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
-+	set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
-+	iommu_add_device(&pdev->dev);
- }
- 
- static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
-diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
-index 61d5a17..05ab06d 100644
---- a/arch/powerpc/platforms/pseries/iommu.c
-+++ b/arch/powerpc/platforms/pseries/iommu.c
-@@ -688,8 +688,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
- 		iommu_table_setparms(phb, dn, tbl);
- 		PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
- 		iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
--		set_iommu_table_base_and_group(&dev->dev,
--					       PCI_DN(dn)->iommu_table);
-+		set_iommu_table_base(&dev->dev, tbl);
-+		iommu_add_device(&dev->dev);
- 		return;
- 	}
- 
-@@ -700,10 +700,10 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
- 	while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL)
- 		dn = dn->parent;
- 
--	if (dn && PCI_DN(dn))
--		set_iommu_table_base_and_group(&dev->dev,
--					       PCI_DN(dn)->iommu_table);
--	else
-+	if (dn && PCI_DN(dn)) {
-+		set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
-+		iommu_add_device(&dev->dev);
-+	} else
- 		printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
- 		       pci_name(dev));
- }
-@@ -1115,7 +1115,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
- 		pr_debug("  found DMA window, table: %p\n", pci->iommu_table);
- 	}
- 
--	set_iommu_table_base_and_group(&dev->dev, pci->iommu_table);
-+	set_iommu_table_base(&dev->dev, pci->iommu_table);
-+	iommu_add_device(&dev->dev);
- }
- 
- static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
++	if (!(tbl->it_type & TCE_PCI_SWINV_FREE))
++		return;
++
++	if (!pe || !ptbl->invalidate_fn)
++		return;
++
++	ptbl->invalidate_fn(pe, tbl, startp, endp, rm);
++}
++
+ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ 			 unsigned long uaddr, enum dma_data_direction direction,
+ 			 struct dma_attrs *attrs, bool rm)
+@@ -619,12 +640,7 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ 		*(tcep++) = cpu_to_be64(proto_tce |
+ 				(rpn++ << tbl->it_page_shift));
+ 
+-	/* Some implementations won't cache invalid TCEs and thus may not
+-	 * need that flush. We'll probably turn it_type into a bit mask
+-	 * of flags if that becomes the case
+-	 */
+-	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
+-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
++	pnv_tce_invalidate(tbl, tces, tcep - 1, rm);
+ 
+ 	return 0;
+ }
+@@ -648,8 +664,7 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
+ 	while (npages--)
+ 		*(tcep++) = cpu_to_be64(0);
+ 
+-	if (tbl->it_type & TCE_PCI_SWINV_FREE)
+-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
++	pnv_tce_invalidate(tbl, tces, tcep - 1, rm);
+ }
+ 
+ static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
+diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
+index 48494d4..095db43 100644
+--- a/arch/powerpc/platforms/powernv/pci.h
++++ b/arch/powerpc/platforms/powernv/pci.h
+@@ -24,6 +24,17 @@ enum pnv_phb_model {
+ #define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
+ #define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
+ 
++struct pnv_ioda_pe;
++typedef void (*pnv_invalidate_fn)(struct pnv_ioda_pe *pe,
++		struct iommu_table *tbl,
++		__be64 *startp, __be64 *endp, bool rm);
++
++struct pnv_iommu_table {
++	struct iommu_table	table;
++	struct pnv_ioda_pe	*pe;
++	pnv_invalidate_fn	invalidate_fn;
++};
++
+ /* Data associated with a PE, including IOMMU tracking etc.. */
+ struct pnv_phb;
+ struct pnv_ioda_pe {
+@@ -53,7 +64,7 @@ struct pnv_ioda_pe {
+ 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
+ 	int			tce32_seg;
+ 	int			tce32_segcount;
+-	struct iommu_table	tce32_table;
++	struct pnv_iommu_table	tce32;
+ 	phys_addr_t		tce_inval_reg_phys;
+ 
+ 	/* 64-bit TCE bypass region */
 -- 
-2.4.0.rc3.8.gfb3e7d5
+2.0.0
+
+_______________________________________________
+Linuxppc-dev mailing list
+Linuxppc-dev@lists.ozlabs.org
+https://lists.ozlabs.org/listinfo/linuxppc-dev
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help