--- v4
+++ v10
@@ -1,114 +1,96 @@
+This is a part of moving DMA window programming to an iommu_ops
+callback. pnv_pci_ioda2_set_window() takes an iommu_table_group as
+a first parameter (not pnv_ioda_pe) as it is going to be used as
+a callback for VFIO DDW code.
+
+This adds pnv_pci_ioda2_tvt_invalidate() to invalidate TVT as it is
+a good thing to do. It does not have immediate effect now as the table
+is never recreated after reboot but it will in the following patches.
+
+This should cause no behavioural change.
+
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
- drivers/vfio/vfio_iommu_spapr_tce.c | 62 +++++++++++++++++++++++--------------
- 1 file changed, 38 insertions(+), 24 deletions(-)
+Changes:
+v9:
+* initialize pe->table_group.tables[0] at the very end when
+tbl is fully initialized
+* moved pnv_pci_ioda2_tvt_invalidate() from earlier patch
+---
+ arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++++++++++++++++++++------
+ 1 file changed, 38 insertions(+), 9 deletions(-)
-diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index fdcc04c..4ff8289 100644
---- a/drivers/vfio/vfio_iommu_spapr_tce.c
-+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
-@@ -435,7 +435,7 @@ static void tce_iommu_release(void *iommu_data)
- iommu = iommu_group_get_iommudata(container->grp);
- tbl = &iommu->tables[0];
- tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
--
-+ iommu->ops->free_table(tbl);
- tce_iommu_detach_group(iommu_data, container->grp);
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index 7d98d83..85f80b2 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -1983,6 +1983,43 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+ }
+ }
+
++static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
++ int num, struct iommu_table *tbl)
++{
++ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
++ table_group);
++ struct pnv_phb *phb = pe->phb;
++ int64_t rc;
++ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
++ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
++
++ pe_info(pe, "Setting up window %llx..%llx pg=%x\n",
++ start_addr, start_addr + win_size - 1,
++ 1UL << tbl->it_page_shift);
++
++ /*
++ * Map TCE table through TVT. The TVE index is the PE number
++ * shifted by 1 bit for 32-bits DMA space.
++ */
++ rc = opal_pci_map_pe_dma_window(phb->opal_id,
++ pe->pe_number,
++ pe->pe_number << 1,
++ 1,
++ __pa(tbl->it_base),
++ tbl->it_size << 3,
++ 1ULL << tbl->it_page_shift);
++ if (rc) {
++ pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
++ return rc;
++ }
++
++ pnv_pci_link_table_and_group(phb->hose->node, num,
++ tbl, &pe->table_group);
++ pnv_pci_ioda2_tvt_invalidate(pe);
++
++ return 0;
++}
++
+ static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+ {
+ uint16_t window_id = (pe->pe_number << 1 ) + 1;
+@@ -2127,21 +2164,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+ #endif
+
+- /*
+- * Map TCE table through TVT. The TVE index is the PE number
+- * shifted by 1 bit for 32-bits DMA space.
+- */
+- rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+- pe->pe_number << 1, 1, __pa(tbl->it_base),
+- tbl->it_size << 3, 1ULL << tbl->it_page_shift);
++ rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+ if (rc) {
+ pe_err(pe, "Failed to configure 32-bit TCE table,"
+ " err %ld\n", rc);
+ goto fail;
}
-@@ -796,6 +796,7 @@ static int tce_iommu_attach_group(void *iommu_data,
- int ret = 0;
- struct tce_container *container = iommu_data;
- struct powerpc_iommu *iommu;
-+ struct iommu_table tbltmp = { 0 }, *tbl = &tbltmp;
-
- mutex_lock(&container->lock);
-
-@@ -806,35 +807,44 @@ static int tce_iommu_attach_group(void *iommu_data,
- iommu_group_id(container->grp),
- iommu_group_id(iommu_group));
- ret = -EBUSY;
-- } else if (container->enabled) {
-+ goto unlock_exit;
-+ }
-+
-+ if (container->enabled) {
- pr_err("tce_vfio: attaching group #%u to enabled container\n",
- iommu_group_id(iommu_group));
- ret = -EBUSY;
-+ goto unlock_exit;
-+ }
-+
-+ iommu = iommu_group_get_iommudata(iommu_group);
-+ if (WARN_ON_ONCE(!iommu)) {
-+ ret = -ENXIO;
-+ goto unlock_exit;
-+ }
-+
-+ /*
-+ * Disable iommu bypass, otherwise the user can DMA to all of
-+ * our physical memory via the bypass window instead of just
-+ * the pages that has been explicitly mapped into the iommu
-+ */
-+ if (iommu->ops && iommu->ops->set_ownership) {
-+ iommu->ops->set_ownership(iommu, true);
- } else {
-- iommu = iommu_group_get_iommudata(iommu_group);
-- if (WARN_ON_ONCE(!iommu)) {
-- ret = -ENXIO;
-- } else if (iommu->ops && iommu->ops->set_ownership) {
-- /*
-- * Disable iommu bypass, otherwise the user can DMA to all of
-- * our physical memory via the bypass window instead of just
-- * the pages that has been explicitly mapped into the iommu
-- */
-- struct iommu_table tbltmp = { 0 }, *tbl = &tbltmp;
+- pnv_pci_ioda2_tvt_invalidate(pe);
-
-- iommu->ops->set_ownership(iommu, true);
-- container->grp = iommu_group;
--
-- ret = iommu->ops->create_table(iommu, 0,
-- IOMMU_PAGE_SHIFT_4K,
-- ilog2(iommu->tce32_size), 1, tbl);
-- if (!ret)
-- ret = iommu->ops->set_window(iommu, 0, tbl);
-- } else {
-- ret = -ENODEV;
-- }
-+ ret = -ENODEV;
-+ goto unlock_exit;
- }
-
-+ container->grp = iommu_group;
-+
-+ /* Create the default window as only now we know the parameters */
-+ ret = iommu->ops->create_table(iommu, 0,
-+ IOMMU_PAGE_SHIFT_4K,
-+ ilog2(iommu->tce32_size), 1, tbl);
-+ if (!ret)
-+ ret = iommu->ops->set_window(iommu, 0, tbl);
-+
-+unlock_exit:
- mutex_unlock(&container->lock);
-
- return ret;
-@@ -845,6 +855,7 @@ static void tce_iommu_detach_group(void *iommu_data,
- {
- struct tce_container *container = iommu_data;
- struct powerpc_iommu *iommu;
-+ long i;
-
- mutex_lock(&container->lock);
- if (iommu_group != container->grp) {
-@@ -865,6 +876,9 @@ static void tce_iommu_detach_group(void *iommu_data,
- iommu = iommu_group_get_iommudata(iommu_group);
- BUG_ON(!iommu);
-
-+ for (i = 0; i < POWERPC_IOMMU_MAX_TABLES; ++i)
-+ iommu->ops->unset_window(iommu, i);
-+
- /* Kernel owns the device now, we can restore bypass */
- if (iommu->ops && iommu->ops->set_ownership)
- iommu->ops->set_ownership(iommu, false);
+ /* OPAL variant of PHB3 invalidated TCEs */
+ if (pe->tce_inval_reg)
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
--
-2.0.0
+2.4.0.rc3.8.gfb3e7d5