--- v11
+++ v4
@@ -1,100 +1,114 @@
-This is a part of moving DMA window programming to an iommu_ops
-callback. pnv_pci_ioda2_set_window() takes an iommu_table_group as
-a first parameter (not pnv_ioda_pe) as it is going to be used as
-a callback for VFIO DDW code.
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+---
+ drivers/vfio/vfio_iommu_spapr_tce.c | 62 +++++++++++++++++++++++--------------
+ 1 file changed, 38 insertions(+), 24 deletions(-)
-This adds pnv_pci_ioda2_tvt_invalidate() to invalidate TVT as it is
-a good thing to do. It does not have immediate effect now as the table
-is never recreated after reboot but it will in the following patches.
-
-This should cause no behavioural change.
-
-Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
-Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
-Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
----
-Changes:
-v11:
-* replaced some 1<<it_page_shift with IOMMU_PAGE_SIZE() macro
-
-v9:
-* initialize pe->table_group.tables[0] at the very end when
-tbl is fully initialized
-* moved pnv_pci_ioda2_tvt_invalidate() from earlier patch
----
- arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++++++++++++++++++++------
- 1 file changed, 38 insertions(+), 9 deletions(-)
-
-diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
-index 3d29fe3..fda01c1 100644
---- a/arch/powerpc/platforms/powernv/pci-ioda.c
-+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
-@@ -1968,6 +1968,43 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
+index fdcc04c..4ff8289 100644
+--- a/drivers/vfio/vfio_iommu_spapr_tce.c
++++ b/drivers/vfio/vfio_iommu_spapr_tce.c
+@@ -435,7 +435,7 @@ static void tce_iommu_release(void *iommu_data)
+ iommu = iommu_group_get_iommudata(container->grp);
+ tbl = &iommu->tables[0];
+ tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+-
++ iommu->ops->free_table(tbl);
+ tce_iommu_detach_group(iommu_data, container->grp);
}
- }
-+static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
-+ int num, struct iommu_table *tbl)
-+{
-+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
-+ table_group);
-+ struct pnv_phb *phb = pe->phb;
-+ int64_t rc;
-+ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
-+ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+@@ -796,6 +796,7 @@ static int tce_iommu_attach_group(void *iommu_data,
+ int ret = 0;
+ struct tce_container *container = iommu_data;
+ struct powerpc_iommu *iommu;
++ struct iommu_table tbltmp = { 0 }, *tbl = &tbltmp;
+
+ mutex_lock(&container->lock);
+
+@@ -806,35 +807,44 @@ static int tce_iommu_attach_group(void *iommu_data,
+ iommu_group_id(container->grp),
+ iommu_group_id(iommu_group));
+ ret = -EBUSY;
+- } else if (container->enabled) {
++ goto unlock_exit;
++ }
+
-+ pe_info(pe, "Setting up window %llx..%llx pg=%x\n",
-+ start_addr, start_addr + win_size - 1,
-+ IOMMU_PAGE_SIZE(tbl));
++ if (container->enabled) {
+ pr_err("tce_vfio: attaching group #%u to enabled container\n",
+ iommu_group_id(iommu_group));
+ ret = -EBUSY;
++ goto unlock_exit;
++ }
++
++ iommu = iommu_group_get_iommudata(iommu_group);
++ if (WARN_ON_ONCE(!iommu)) {
++ ret = -ENXIO;
++ goto unlock_exit;
++ }
+
+ /*
-+ * Map TCE table through TVT. The TVE index is the PE number
-+ * shifted by 1 bit for 32-bits DMA space.
++ * Disable iommu bypass, otherwise the user can DMA to all of
++ * our physical memory via the bypass window instead of just
++ * the pages that has been explicitly mapped into the iommu
+ */
-+ rc = opal_pci_map_pe_dma_window(phb->opal_id,
-+ pe->pe_number,
-+ pe->pe_number << 1,
-+ 1,
-+ __pa(tbl->it_base),
-+ tbl->it_size << 3,
-+ IOMMU_PAGE_SIZE(tbl));
-+ if (rc) {
-+ pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
-+ return rc;
-+ }
-+
-+ pnv_pci_link_table_and_group(phb->hose->node, num,
-+ tbl, &pe->table_group);
-+ pnv_pci_ioda2_tce_invalidate_entire(pe);
-+
-+ return 0;
-+}
-+
- static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
- {
- uint16_t window_id = (pe->pe_number << 1 ) + 1;
-@@ -2123,21 +2160,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- pe->table_group.ops = &pnv_pci_ioda2_ops;
- #endif
-
-- /*
-- * Map TCE table through TVT. The TVE index is the PE number
-- * shifted by 1 bit for 32-bits DMA space.
-- */
-- rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
-- pe->pe_number << 1, 1, __pa(tbl->it_base),
-- tbl->it_size << 3, 1ULL << tbl->it_page_shift);
-+ rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
- if (rc) {
- pe_err(pe, "Failed to configure 32-bit TCE table,"
- " err %ld\n", rc);
- goto fail;
++ if (iommu->ops && iommu->ops->set_ownership) {
++ iommu->ops->set_ownership(iommu, true);
+ } else {
+- iommu = iommu_group_get_iommudata(iommu_group);
+- if (WARN_ON_ONCE(!iommu)) {
+- ret = -ENXIO;
+- } else if (iommu->ops && iommu->ops->set_ownership) {
+- /*
+- * Disable iommu bypass, otherwise the user can DMA to all of
+- * our physical memory via the bypass window instead of just
+- * the pages that has been explicitly mapped into the iommu
+- */
+- struct iommu_table tbltmp = { 0 }, *tbl = &tbltmp;
+-
+- iommu->ops->set_ownership(iommu, true);
+- container->grp = iommu_group;
+-
+- ret = iommu->ops->create_table(iommu, 0,
+- IOMMU_PAGE_SHIFT_4K,
+- ilog2(iommu->tce32_size), 1, tbl);
+- if (!ret)
+- ret = iommu->ops->set_window(iommu, 0, tbl);
+- } else {
+- ret = -ENODEV;
+- }
++ ret = -ENODEV;
++ goto unlock_exit;
}
-- pnv_pci_ioda2_tce_invalidate_entire(pe);
--
- /* OPAL variant of PHB3 invalidated TCEs */
- if (phb->ioda.tce_inval_reg)
- tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
++ container->grp = iommu_group;
++
++ /* Create the default window as only now we know the parameters */
++ ret = iommu->ops->create_table(iommu, 0,
++ IOMMU_PAGE_SHIFT_4K,
++ ilog2(iommu->tce32_size), 1, tbl);
++ if (!ret)
++ ret = iommu->ops->set_window(iommu, 0, tbl);
++
++unlock_exit:
+ mutex_unlock(&container->lock);
+
+ return ret;
+@@ -845,6 +855,7 @@ static void tce_iommu_detach_group(void *iommu_data,
+ {
+ struct tce_container *container = iommu_data;
+ struct powerpc_iommu *iommu;
++ long i;
+
+ mutex_lock(&container->lock);
+ if (iommu_group != container->grp) {
+@@ -865,6 +876,9 @@ static void tce_iommu_detach_group(void *iommu_data,
+ iommu = iommu_group_get_iommudata(iommu_group);
+ BUG_ON(!iommu);
+
++ for (i = 0; i < POWERPC_IOMMU_MAX_TABLES; ++i)
++ iommu->ops->unset_window(iommu, i);
++
+ /* Kernel owns the device now, we can restore bypass */
+ if (iommu->ops && iommu->ops->set_ownership)
+ iommu->ops->set_ownership(iommu, false);
--
-2.4.0.rc3.8.gfb3e7d5
+2.0.0