--- v10
+++ v4
@@ -1,82 +1,98 @@
-Normally a bitmap from the iommu_table is used to track what TCE entry
-is in use. Since we are going to use iommu_table without its locks and
-do xchg() instead, it becomes essential not to put bits which are not
-implied in the direction flag as the old TCE value (more precisely -
-the permission bits) will be used to decide whether to put the page or not.
+This adds missing locks in iommu_take_ownership()/
+iommu_release_ownership().
-This adds iommu_direction_to_tce_perm() (its counterpart is there already)
-and uses it for powernv's pnv_tce_build().
+This marks all pages busy in iommu_table::it_map in order to catch
+errors if there is an attempt to use this table while ownership over it
+is taken.
+
+This only clears TCE content if there is no page marked busy in it_map.
+Clearing must be done outside of the table locks as iommu_clear_tce()
+called from iommu_clear_tces_and_put_pages() does this.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
-Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
+Note: we might want to get rid of it as this patchset removes it_map
+from tables passed to VFIO.
+
Changes:
-v9:
-* added comment why we must put only valid permission bits
+v5:
+* do not store bit#0 value, it has to be set for zero-based table
+anyway
+* removed test_and_clear_bit
+* only disable bypass if succeeded
---
- arch/powerpc/include/asm/iommu.h | 1 +
- arch/powerpc/kernel/iommu.c | 15 +++++++++++++++
- arch/powerpc/platforms/powernv/pci.c | 7 +------
- 3 files changed, 17 insertions(+), 6 deletions(-)
+ arch/powerpc/kernel/iommu.c | 31 +++++++++++++++++++++++++------
+ 1 file changed, 25 insertions(+), 6 deletions(-)
-diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
-index e94a5e3..d91bd69 100644
---- a/arch/powerpc/include/asm/iommu.h
-+++ b/arch/powerpc/include/asm/iommu.h
-@@ -200,6 +200,7 @@ extern int iommu_take_ownership(struct iommu_table *tbl);
- extern void iommu_release_ownership(struct iommu_table *tbl);
-
- extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
-+extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
-
- #endif /* __KERNEL__ */
- #endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
-index 8673c94..31319f8 100644
+index 952939f..407d0d6 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
-@@ -863,6 +863,21 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+@@ -1024,33 +1024,48 @@ EXPORT_SYMBOL_GPL(iommu_tce_build);
+
+ int iommu_take_ownership(struct iommu_table *tbl)
+ {
+- unsigned long sz = (tbl->it_size + 7) >> 3;
++ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
++ int ret = 0;
++
++ spin_lock_irqsave(&tbl->large_pool.lock, flags);
++ for (i = 0; i < tbl->nr_pools; i++)
++ spin_lock(&tbl->pools[i].lock);
+
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
+ pr_err("iommu_tce: it_map is not empty");
+- return -EBUSY;
++ ret = -EBUSY;
++ if (tbl->it_offset == 0)
++ set_bit(0, tbl->it_map);
++ } else {
++ memset(tbl->it_map, 0xff, sz);
}
+
+- memset(tbl->it_map, 0xff, sz);
++ for (i = 0; i < tbl->nr_pools; i++)
++ spin_unlock(&tbl->pools[i].lock);
++ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
+
+ /*
+ * Disable iommu bypass, otherwise the user can DMA to all of
+ * our physical memory via the bypass window instead of just
+ * the pages that has been explicitly mapped into the iommu
+ */
+- if (tbl->set_bypass)
++ if (!ret && tbl->set_bypass)
+ tbl->set_bypass(tbl, false);
+
+- return 0;
++ return ret;
}
+ EXPORT_SYMBOL_GPL(iommu_take_ownership);
-+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
-+{
-+ switch (dir) {
-+ case DMA_BIDIRECTIONAL:
-+ return TCE_PCI_READ | TCE_PCI_WRITE;
-+ case DMA_FROM_DEVICE:
-+ return TCE_PCI_WRITE;
-+ case DMA_TO_DEVICE:
-+ return TCE_PCI_READ;
-+ default:
-+ return 0;
-+ }
-+}
-+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
+ void iommu_release_ownership(struct iommu_table *tbl)
+ {
+- unsigned long sz = (tbl->it_size + 7) >> 3;
++ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+
- #ifdef CONFIG_IOMMU_API
- /*
- * SPAPR TCE API
-diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
-index bca2aeb..b7ea245 100644
---- a/arch/powerpc/platforms/powernv/pci.c
-+++ b/arch/powerpc/platforms/powernv/pci.c
-@@ -576,15 +576,10 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr, enum dma_data_direction direction,
- struct dma_attrs *attrs, bool rm)
- {
-- u64 proto_tce;
-+ u64 proto_tce = iommu_direction_to_tce_perm(direction);
- __be64 *tcep, *tces;
- u64 rpn;
++ spin_lock_irqsave(&tbl->large_pool.lock, flags);
++ for (i = 0; i < tbl->nr_pools; i++)
++ spin_lock(&tbl->pools[i].lock);
-- proto_tce = TCE_PCI_READ; // Read allowed
--
-- if (direction != DMA_TO_DEVICE)
-- proto_tce |= TCE_PCI_WRITE;
--
- tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
- rpn = __pa(uaddr) >> tbl->it_page_shift;
+ memset(tbl->it_map, 0, sz);
+@@ -1058,6 +1073,10 @@ void iommu_release_ownership(struct iommu_table *tbl)
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
++ for (i = 0; i < tbl->nr_pools; i++)
++ spin_unlock(&tbl->pools[i].lock);
++ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
++
+ /* The kernel owns the device now, we can restore the iommu bypass */
+ if (tbl->set_bypass)
+ tbl->set_bypass(tbl, true);
--
-2.4.0.rc3.8.gfb3e7d5
+2.0.0