Inter-revision diff: patch 26

Comparing v12 (message) to v7 (message)

--- v12
+++ v7
@@ -1,100 +1,121 @@
-This is a part of moving DMA window programming to an iommu_ops
-callback. pnv_pci_ioda2_set_window() takes an iommu_table_group as
-a first parameter (not pnv_ioda_pe) as it is going to be used as
-a callback for VFIO DDW code.
+In order to support memory pre-registration, we need a way to track
+the use of every registered memory region and only allow unregistration
+if a region is not in use anymore. So we need a way to tell from what
+region the just cleared TCE was from.
 
-This should cause no behavioural change.
+This adds a userspace view of the TCE table into iommu_table struct.
+It contains userspace address, one per TCE entry. The table is only
+allocated when the ownership over an IOMMU group is taken which means
+it is only used from outside of the powernv code (such as VFIO).
 
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
-Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
-Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
 ---
-Changes:
-v12:
-* removed comment from commit log about pnv_pci_ioda2_tvt_invalidate()/
-pnv_pci_ioda2_invalidate_entire()
+ arch/powerpc/include/asm/iommu.h          |  6 ++++++
+ arch/powerpc/kernel/iommu.c               |  7 +++++++
+ arch/powerpc/platforms/powernv/pci-ioda.c | 23 ++++++++++++++++++++++-
+ 3 files changed, 35 insertions(+), 1 deletion(-)
 
-v11:
-* replaced some 1<<it_page_shift with IOMMU_PAGE_SIZE() macro
-
-v9:
-* initialize pe->table_group.tables[0] at the very end when
-tbl is fully initialized
-* moved pnv_pci_ioda2_tvt_invalidate() from earlier patch
----
- arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++++++++++++++++++++------
- 1 file changed, 38 insertions(+), 9 deletions(-)
-
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index 2c08c91..a768a4d 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -106,9 +106,15 @@ struct iommu_table {
+ 	unsigned long *it_map;       /* A simple allocation bitmap for now */
+ 	unsigned long  it_page_shift;/* table iommu page size */
+ 	struct iommu_table_group *it_group;
++	unsigned long *it_userspace; /* userspace view of the table */
+ 	struct iommu_table_ops *it_ops;
+ };
+ 
++#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
++		((tbl)->it_userspace ? \
++			&((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
++			NULL)
++
+ /* Pure 2^n version of get_order */
+ static inline __attribute_const__
+ int get_iommu_order(unsigned long size, struct iommu_table *tbl)
+diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
+index 0bcd988..82102d1 100644
+--- a/arch/powerpc/kernel/iommu.c
++++ b/arch/powerpc/kernel/iommu.c
+@@ -38,6 +38,7 @@
+ #include <linux/pci.h>
+ #include <linux/iommu.h>
+ #include <linux/sched.h>
++#include <linux/vmalloc.h>
+ #include <asm/io.h>
+ #include <asm/prom.h>
+ #include <asm/iommu.h>
+@@ -1069,6 +1070,9 @@ static int iommu_table_take_ownership(struct iommu_table *tbl)
+ 		spin_unlock(&tbl->pools[i].lock);
+ 	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
+ 
++	BUG_ON(tbl->it_userspace);
++	tbl->it_userspace = vzalloc(sizeof(*tbl->it_userspace) * tbl->it_size);
++
+ 	return 0;
+ }
+ 
+@@ -1102,6 +1106,9 @@ static void iommu_table_release_ownership(struct iommu_table *tbl)
+ {
+ 	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+ 
++	vfree(tbl->it_userspace);
++	tbl->it_userspace = NULL;
++
+ 	spin_lock_irqsave(&tbl->large_pool.lock, flags);
+ 	for (i = 0; i < tbl->nr_pools; i++)
+ 		spin_lock(&tbl->pools[i].lock);
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
-index 38d53dc..da14043 100644
+index bc36cf1..036f3c1 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
-@@ -1969,6 +1969,43 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
- 	}
+@@ -26,6 +26,7 @@
+ #include <linux/iommu.h>
+ #include <linux/mmzone.h>
+ #include <linux/sizes.h>
++#include <linux/vmalloc.h>
+ 
+ #include <asm/mmzone.h>
+ #include <asm/sections.h>
+@@ -1469,6 +1470,9 @@ static void pnv_pci_free_table(struct iommu_table *tbl)
+ 	if (!tbl->it_size)
+ 		return;
+ 
++	if (tbl->it_userspace)
++		vfree(tbl->it_userspace);
++
+ 	pnv_free_tce_table(tbl->it_base, size, tbl->it_indirect_levels);
+ 	iommu_reset_table(tbl, "ioda2");
+ }
+@@ -1656,9 +1660,26 @@ static void pnv_ioda2_set_ownership(struct iommu_table_group *table_group,
+ 	pnv_pci_ioda2_set_bypass(pe, !enable);
  }
  
-+static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
-+		int num, struct iommu_table *tbl)
++static long pnv_pci_ioda2_create_table_with_uas(
++		struct iommu_table_group *table_group,
++		int num, __u32 page_shift, __u64 window_size, __u32 levels,
++		struct iommu_table *tbl)
 +{
-+	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
-+			table_group);
-+	struct pnv_phb *phb = pe->phb;
-+	int64_t rc;
-+	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
-+	const __u64 win_size = tbl->it_size << tbl->it_page_shift;
++	long ret = pnv_pci_ioda2_create_table(table_group, num,
++			page_shift, window_size, levels, tbl);
 +
-+	pe_info(pe, "Setting up window %llx..%llx pg=%x\n",
-+			start_addr, start_addr + win_size - 1,
-+			IOMMU_PAGE_SIZE(tbl));
++	if (ret)
++		return ret;
 +
-+	/*
-+	 * Map TCE table through TVT. The TVE index is the PE number
-+	 * shifted by 1 bit for 32-bits DMA space.
-+	 */
-+	rc = opal_pci_map_pe_dma_window(phb->opal_id,
-+			pe->pe_number,
-+			pe->pe_number << 1,
-+			1,
-+			__pa(tbl->it_base),
-+			tbl->it_size << 3,
-+			IOMMU_PAGE_SIZE(tbl));
-+	if (rc) {
-+		pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
-+		return rc;
-+	}
-+
-+	pnv_pci_link_table_and_group(phb->hose->node, num,
-+			tbl, &pe->table_group);
-+	pnv_pci_ioda2_tce_invalidate_entire(pe);
++	BUG_ON(tbl->it_userspace);
++	tbl->it_userspace = vzalloc(sizeof(*tbl->it_userspace) * tbl->it_size);
 +
 +	return 0;
 +}
 +
- static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
- {
- 	uint16_t window_id = (pe->pe_number << 1 ) + 1;
-@@ -2124,21 +2161,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- 	pe->table_group.ops = &pnv_pci_ioda2_ops;
- #endif
- 
--	/*
--	 * Map TCE table through TVT. The TVE index is the PE number
--	 * shifted by 1 bit for 32-bits DMA space.
--	 */
--	rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
--			pe->pe_number << 1, 1, __pa(tbl->it_base),
--			tbl->it_size << 3, 1ULL << tbl->it_page_shift);
-+	rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
- 	if (rc) {
- 		pe_err(pe, "Failed to configure 32-bit TCE table,"
- 		       " err %ld\n", rc);
- 		goto fail;
- 	}
- 
--	pnv_pci_ioda2_tce_invalidate_entire(pe);
--
- 	/* OPAL variant of PHB3 invalidated TCEs */
- 	if (phb->ioda.tce_inval_reg)
- 		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+ 	.set_ownership = pnv_ioda2_set_ownership,
+-	.create_table = pnv_pci_ioda2_create_table,
++	.create_table = pnv_pci_ioda2_create_table_with_uas,
+ 	.set_window = pnv_pci_ioda2_set_window,
+ 	.unset_window = pnv_pci_ioda2_unset_window,
+ };
 -- 
-2.4.0.rc3.8.gfb3e7d5
+2.0.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help