--- v9
+++ v4
@@ -1,168 +1,43 @@
-This reverts commit 9e8d4a19ab66ec9e132d405357b9108a4f26efd3 as
-tce32_table has exactly the same life time as the whole PE.
-
-This makes use of a new iommu_reset_table() helper instead.
+This clears the TCE table when a container is being closed as this is
+a good thing to leave the table clean before passing the ownership
+back to the host kernel.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
- arch/powerpc/include/asm/iommu.h | 3 ---
- arch/powerpc/platforms/powernv/pci-ioda.c | 35 +++++++++++++------------------
- arch/powerpc/platforms/powernv/pci.h | 2 +-
- 3 files changed, 15 insertions(+), 25 deletions(-)
+ drivers/vfio/vfio_iommu_spapr_tce.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
-diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
-index e2cef38..9d320e0 100644
---- a/arch/powerpc/include/asm/iommu.h
-+++ b/arch/powerpc/include/asm/iommu.h
-@@ -79,9 +79,6 @@ struct iommu_table {
- struct iommu_group *it_group;
- #endif
- void (*set_bypass)(struct iommu_table *tbl, bool enable);
--#ifdef CONFIG_PPC_POWERNV
-- void *data;
--#endif
- };
-
- /* Pure 2^n version of get_order */
-diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
-index 920c252..eff26ed 100644
---- a/arch/powerpc/platforms/powernv/pci-ioda.c
-+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
-@@ -1086,10 +1086,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
- return;
- }
-
-- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
-- GFP_KERNEL, hose->node);
-- pe->tce32_table->data = pe;
--
- /* Associate it with all child devices */
- pnv_ioda_setup_same_PE(bus, pe);
-
-@@ -1295,7 +1291,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
- bus = dev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
-- tbl = pe->tce32_table;
-+ tbl = &pe->tce32_table;
- addr = tbl->it_base;
-
- opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
-@@ -1310,9 +1306,8 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
- if (rc)
- pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
-
-- iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
-+ iommu_reset_table(tbl, of_node_full_name(dev->dev.of_node));
- free_pages(addr, get_order(TCE32_TABLE_SIZE));
-- pe->tce32_table = NULL;
+diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
+index 1ef46c3..daf2e2c 100644
+--- a/drivers/vfio/vfio_iommu_spapr_tce.c
++++ b/drivers/vfio/vfio_iommu_spapr_tce.c
+@@ -134,16 +134,24 @@ static void *tce_iommu_open(unsigned long arg)
+ return container;
}
- static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
-@@ -1460,10 +1455,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
- continue;
- }
++static int tce_iommu_clear(struct tce_container *container,
++ struct iommu_table *tbl,
++ unsigned long entry, unsigned long pages);
++
+ static void tce_iommu_release(void *iommu_data)
+ {
+ struct tce_container *container = iommu_data;
++ struct iommu_table *tbl = container->tbl;
-- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
-- GFP_KERNEL, hose->node);
-- pe->tce32_table->data = pe;
--
- /* Put PE to the list */
- mutex_lock(&phb->ioda.pe_list_mutex);
- list_add_tail(&pe->list, &phb->ioda.pe_list);
-@@ -1598,7 +1589,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
+- WARN_ON(container->tbl && !container->tbl->it_group);
++ WARN_ON(tbl && !tbl->it_group);
+ tce_iommu_disable(container);
- pe = &phb->ioda.pe_array[pdn->pe_number];
- WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
-- set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
-+ set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
- }
+- if (container->tbl && container->tbl->it_group)
+- tce_iommu_detach_group(iommu_data, container->tbl->it_group);
++ if (tbl) {
++ tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
- static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
-@@ -1625,7 +1616,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
- } else {
- dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
- set_dma_ops(&pdev->dev, &dma_iommu_ops);
-- set_iommu_table_base(&pdev->dev, pe->tce32_table);
-+ set_iommu_table_base(&pdev->dev, &pe->tce32_table);
- }
- *pdev->dev.dma_mask = dma_mask;
- return 0;
-@@ -1662,9 +1653,9 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- list_for_each_entry(dev, &bus->devices, bus_list) {
- if (add_to_iommu_group)
- set_iommu_table_base_and_group(&dev->dev,
-- pe->tce32_table);
-+ &pe->tce32_table);
- else
-- set_iommu_table_base(&dev->dev, pe->tce32_table);
-+ set_iommu_table_base(&dev->dev, &pe->tce32_table);
++ if (tbl->it_group)
++ tce_iommu_detach_group(iommu_data, tbl->it_group);
++ }
+ mutex_destroy(&container->lock);
- if (dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate,
-@@ -1754,7 +1745,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
- void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
- {
-- struct pnv_ioda_pe *pe = tbl->data;
-+ struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-+ tce32_table);
- struct pnv_phb *phb = pe->phb;
-
- if (phb->type == PNV_PHB_IODA1)
-@@ -1817,7 +1809,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
- }
-
- /* Setup linux iommu table */
-- tbl = pe->tce32_table;
-+ tbl = &pe->tce32_table;
- pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
- base << 28, IOMMU_PAGE_SHIFT_4K);
-
-@@ -1862,7 +1854,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
-
- static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
- {
-- struct pnv_ioda_pe *pe = tbl->data;
-+ struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-+ tce32_table);
- uint16_t window_id = (pe->pe_number << 1 ) + 1;
- int64_t rc;
-
-@@ -1907,10 +1900,10 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
- pe->tce_bypass_base = 1ull << 59;
-
- /* Install set_bypass callback for VFIO */
-- pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
-+ pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
-
- /* Enable bypass by default */
-- pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
-+ pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
- }
-
- static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-@@ -1958,7 +1951,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- }
-
- /* Setup linux iommu table */
-- tbl = pe->tce32_table;
-+ tbl = &pe->tce32_table;
- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
- IOMMU_PAGE_SHIFT_4K);
-
-diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
-index 070ee88..c954c64 100644
---- a/arch/powerpc/platforms/powernv/pci.h
-+++ b/arch/powerpc/platforms/powernv/pci.h
-@@ -57,7 +57,7 @@ struct pnv_ioda_pe {
- /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
- int tce32_seg;
- int tce32_segcount;
-- struct iommu_table *tce32_table;
-+ struct iommu_table tce32_table;
- phys_addr_t tce_inval_reg_phys;
-
- /* 64-bit TCE bypass region */
+ kfree(container);
--
2.0.0