Inter-revision diff: patch 13

Comparing v10 (message) to v4 (message)

--- v10
+++ v4
@@ -1,82 +1,98 @@
-Normally a bitmap from the iommu_table is used to track what TCE entry
-is in use. Since we are going to use iommu_table without its locks and
-do xchg() instead, it becomes essential not to put bits which are not
-implied in the direction flag as the old TCE value (more precisely -
-the permission bits) will be used to decide whether to put the page or not.
+This adds missing locks in iommu_take_ownership()/
+iommu_release_ownership().
 
-This adds iommu_direction_to_tce_perm() (its counterpart is there already)
-and uses it for powernv's pnv_tce_build().
+This marks all pages busy in iommu_table::it_map in order to catch
+errors if there is an attempt to use this table while ownership over it
+is taken.
+
+This only clears TCE content if there is no page marked busy in it_map.
+Clearing must be done outside of the table locks as iommu_clear_tce()
+called from iommu_clear_tces_and_put_pages() does this.
 
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
-Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
 ---
+Note: we might want to get rid of it as this patchset removes it_map
+from tables passed to VFIO.
+
 Changes:
-v9:
-* added comment why we must put only valid permission bits
+v5:
+* do not store bit#0 value, it has to be set for zero-based table
+anyway
+* removed test_and_clear_bit
+* only disable bypass if succeeded
 ---
- arch/powerpc/include/asm/iommu.h     |  1 +
- arch/powerpc/kernel/iommu.c          | 15 +++++++++++++++
- arch/powerpc/platforms/powernv/pci.c |  7 +------
- 3 files changed, 17 insertions(+), 6 deletions(-)
+ arch/powerpc/kernel/iommu.c | 31 +++++++++++++++++++++++++------
+ 1 file changed, 25 insertions(+), 6 deletions(-)
 
-diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
-index e94a5e3..d91bd69 100644
---- a/arch/powerpc/include/asm/iommu.h
-+++ b/arch/powerpc/include/asm/iommu.h
-@@ -200,6 +200,7 @@ extern int iommu_take_ownership(struct iommu_table *tbl);
- extern void iommu_release_ownership(struct iommu_table *tbl);
- 
- extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
-+extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
- 
- #endif /* __KERNEL__ */
- #endif /* _ASM_IOMMU_H */
 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
-index 8673c94..31319f8 100644
+index 952939f..407d0d6 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
-@@ -863,6 +863,21 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+@@ -1024,33 +1024,48 @@ EXPORT_SYMBOL_GPL(iommu_tce_build);
+ 
+ int iommu_take_ownership(struct iommu_table *tbl)
+ {
+-	unsigned long sz = (tbl->it_size + 7) >> 3;
++	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
++	int ret = 0;
++
++	spin_lock_irqsave(&tbl->large_pool.lock, flags);
++	for (i = 0; i < tbl->nr_pools; i++)
++		spin_lock(&tbl->pools[i].lock);
+ 
+ 	if (tbl->it_offset == 0)
+ 		clear_bit(0, tbl->it_map);
+ 
+ 	if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
+ 		pr_err("iommu_tce: it_map is not empty");
+-		return -EBUSY;
++		ret = -EBUSY;
++		if (tbl->it_offset == 0)
++			set_bit(0, tbl->it_map);
++	} else {
++		memset(tbl->it_map, 0xff, sz);
  	}
+ 
+-	memset(tbl->it_map, 0xff, sz);
++	for (i = 0; i < tbl->nr_pools; i++)
++		spin_unlock(&tbl->pools[i].lock);
++	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
+ 
+ 	/*
+ 	 * Disable iommu bypass, otherwise the user can DMA to all of
+ 	 * our physical memory via the bypass window instead of just
+ 	 * the pages that has been explicitly mapped into the iommu
+ 	 */
+-	if (tbl->set_bypass)
++	if (!ret && tbl->set_bypass)
+ 		tbl->set_bypass(tbl, false);
+ 
+-	return 0;
++	return ret;
  }
+ EXPORT_SYMBOL_GPL(iommu_take_ownership);
  
-+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
-+{
-+	switch (dir) {
-+	case DMA_BIDIRECTIONAL:
-+		return TCE_PCI_READ | TCE_PCI_WRITE;
-+	case DMA_FROM_DEVICE:
-+		return TCE_PCI_WRITE;
-+	case DMA_TO_DEVICE:
-+		return TCE_PCI_READ;
-+	default:
-+		return 0;
-+	}
-+}
-+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
+ void iommu_release_ownership(struct iommu_table *tbl)
+ {
+-	unsigned long sz = (tbl->it_size + 7) >> 3;
++	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
 +
- #ifdef CONFIG_IOMMU_API
- /*
-  * SPAPR TCE API
-diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
-index bca2aeb..b7ea245 100644
---- a/arch/powerpc/platforms/powernv/pci.c
-+++ b/arch/powerpc/platforms/powernv/pci.c
-@@ -576,15 +576,10 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
- 			 unsigned long uaddr, enum dma_data_direction direction,
- 			 struct dma_attrs *attrs, bool rm)
- {
--	u64 proto_tce;
-+	u64 proto_tce = iommu_direction_to_tce_perm(direction);
- 	__be64 *tcep, *tces;
- 	u64 rpn;
++	spin_lock_irqsave(&tbl->large_pool.lock, flags);
++	for (i = 0; i < tbl->nr_pools; i++)
++		spin_lock(&tbl->pools[i].lock);
  
--	proto_tce = TCE_PCI_READ; // Read allowed
--
--	if (direction != DMA_TO_DEVICE)
--		proto_tce |= TCE_PCI_WRITE;
--
- 	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
- 	rpn = __pa(uaddr) >> tbl->it_page_shift;
+ 	memset(tbl->it_map, 0, sz);
  
+@@ -1058,6 +1073,10 @@ void iommu_release_ownership(struct iommu_table *tbl)
+ 	if (tbl->it_offset == 0)
+ 		set_bit(0, tbl->it_map);
+ 
++	for (i = 0; i < tbl->nr_pools; i++)
++		spin_unlock(&tbl->pools[i].lock);
++	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
++
+ 	/* The kernel owns the device now, we can restore the iommu bypass */
+ 	if (tbl->set_bypass)
+ 		tbl->set_bypass(tbl, true);
 -- 
-2.4.0.rc3.8.gfb3e7d5
+2.0.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help