--- v12
+++ v4
@@ -1,173 +1,78 @@
-This is a part of moving TCE table allocation into an iommu_ops
-callback to support multiple IOMMU groups per one VFIO container.
-
-This moves the code which allocates the actual TCE tables to helpers:
-pnv_pci_ioda2_table_alloc_pages() and pnv_pci_ioda2_table_free_pages().
-These do not allocate/free the iommu_table struct.
-
-This enforces window size to be a power of two.
-
-This should cause no behavioural change.
+This uses new helpers to remove the default TCE table if the ownership is
+being taken and create it otherwise. So once an external user (such as
+VFIO) obtained the ownership over a group, it does not have any DMA
+windows, neither default 32bit not bypass window. The external user is
+expected to unprogram DMA windows on PHBs before returning ownership
+back to the kernel.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
-Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
-Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
-Changes:
-v10:
-* removed @table_group parameter from pnv_pci_create_table as it was not used
-* removed *tce_table_allocated from pnv_alloc_tce_table_pages()
-* pnv_pci_create_table/pnv_pci_free_table renamed to
-pnv_pci_ioda2_table_alloc_pages/pnv_pci_ioda2_table_free_pages and moved
-back to pci-ioda.c as these only allocate pages for IODA2 and there is
-no chance they will be reused for IODA1/P5IOC2
-* shortened subject line
-
-v9:
-* moved helpers to the common powernv pci.c file from pci-ioda.c
-* moved bits from pnv_pci_create_table() to pnv_alloc_tce_table_pages()
----
- arch/powerpc/platforms/powernv/pci-ioda.c | 83 +++++++++++++++++++++++--------
- 1 file changed, 63 insertions(+), 20 deletions(-)
+ arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++++++++++++++++++++++++++----
+ drivers/vfio/vfio_iommu_spapr_tce.c | 8 ++++++++
+ 2 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
-index 95d3121..38d53dc 100644
+index 16ddaba..79a8149 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
-@@ -40,6 +40,7 @@
- #include <asm/debug.h>
- #include <asm/firmware.h>
- #include <asm/pnv-pci.h>
-+#include <asm/mmzone.h>
+@@ -1570,11 +1570,33 @@ static void pnv_ioda2_set_ownership(struct powerpc_iommu *iommu,
+ {
+ struct pnv_ioda_pe *pe = container_of(iommu, struct pnv_ioda_pe,
+ iommu);
+- if (enable)
+- iommu_take_ownership(iommu);
+- else
+- iommu_release_ownership(iommu);
++ if (enable) {
++ pnv_pci_ioda2_unset_window(&pe->iommu, 0);
++ pnv_pci_ioda2_free_table(&pe->iommu.tables[0]);
++ } else {
++ struct iommu_table *tbl = &pe->iommu.tables[0];
++ int64_t rc;
- #include <misc/cxl.h>
-
-@@ -49,6 +50,8 @@
- /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
- #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
-
-+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
++ rc = pnv_pci_ioda2_create_table(&pe->iommu, 0,
++ IOMMU_PAGE_SHIFT_4K,
++ ilog2(pe->phb->ioda.m32_pci_base),
++ POWERPC_IOMMU_DEFAULT_LEVELS, tbl);
++ if (rc) {
++ pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
++ rc);
++ return;
++ }
+
- static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
- const char *fmt, ...)
- {
-@@ -1313,8 +1316,8 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
- iommu_group_put(pe->table_group.group);
- BUG_ON(pe->table_group.group);
- }
-+ pnv_pci_ioda2_table_free_pages(tbl);
- iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
-- free_pages(addr, get_order(TCE32_TABLE_SIZE));
++ iommu_init_table(tbl, pe->phb->hose->node);
++
++ rc = pnv_pci_ioda2_set_window(&pe->iommu, 0, tbl);
++ if (rc) {
++ pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
++ rc);
++ pnv_pci_ioda2_free_table(tbl);
++ return;
++ }
++ }
+ pnv_pci_ioda2_set_bypass(pe, !enable);
}
- static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
-@@ -2032,13 +2035,62 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
- phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8);
- }
-
--static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-- struct pnv_ioda_pe *pe)
-+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift)
- {
- struct page *tce_mem = NULL;
-+ __be64 *addr;
-+ unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
+diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
+index b5134b7..fdcc04c 100644
+--- a/drivers/vfio/vfio_iommu_spapr_tce.c
++++ b/drivers/vfio/vfio_iommu_spapr_tce.c
+@@ -820,8 +820,16 @@ static int tce_iommu_attach_group(void *iommu_data,
+ * our physical memory via the bypass window instead of just
+ * the pages that has been explicitly mapped into the iommu
+ */
++ struct iommu_table tbltmp = { 0 }, *tbl = &tbltmp;
+
-+ tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
-+ if (!tce_mem) {
-+ pr_err("Failed to allocate a TCE memory, order=%d\n", order);
-+ return NULL;
-+ }
-+ addr = page_address(tce_mem);
-+ memset(addr, 0, 1UL << (order + PAGE_SHIFT));
+ iommu->ops->set_ownership(iommu, true);
+ container->grp = iommu_group;
+
-+ return addr;
-+}
-+
-+static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
-+ __u32 page_shift, __u64 window_size, struct iommu_table *tbl)
-+{
- void *addr;
-+ const unsigned window_shift = ilog2(window_size);
-+ unsigned entries_shift = window_shift - page_shift;
-+ unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
-+ const unsigned long tce_table_size = 1UL << table_shift;
-+
-+ if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
-+ return -EINVAL;
-+
-+ /* Allocate TCE table */
-+ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, table_shift);
-+ if (!addr)
-+ return -ENOMEM;
-+
-+ /* Setup linux iommu table */
-+ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
-+ page_shift);
-+
-+ pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
-+ window_size, tce_table_size, bus_offset);
-+
-+ return 0;
-+}
-+
-+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
-+{
-+ if (!tbl->it_size)
-+ return;
-+
-+ free_pages(tbl->it_base, get_order(tbl->it_size << 3));
-+}
-+
-+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-+ struct pnv_ioda_pe *pe)
-+{
- struct iommu_table *tbl;
-- unsigned int tce_table_size, end;
- int64_t rc;
-
- /* We shouldn't already have a 32-bit DMA associated */
-@@ -2055,24 +2107,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-
- /* The PE will reserve all possible 32-bits space */
- pe->tce32_seg = 0;
-- end = (1 << ilog2(phb->ioda.m32_pci_base));
-- tce_table_size = (end / 0x1000) * 8;
- pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
-- end);
-+ phb->ioda.m32_pci_base);
-
-- /* Allocate TCE table */
-- tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
-- get_order(tce_table_size));
-- if (!tce_mem) {
-- pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
-+ /* Setup linux iommu table */
-+ rc = pnv_pci_ioda2_table_alloc_pages(pe->phb->hose->node,
-+ 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, tbl);
-+ if (rc) {
-+ pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc);
- goto fail;
- }
-- addr = page_address(tce_mem);
-- memset(addr, 0, tce_table_size);
--
-- /* Setup linux iommu table */
-- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
-- IOMMU_PAGE_SHIFT_4K);
-
- tbl->it_ops = &pnv_ioda2_iommu_ops;
- iommu_init_table(tbl, phb->hose->node);
-@@ -2118,9 +2162,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- fail:
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
-- if (tce_mem)
-- __free_pages(tce_mem, get_order(tce_table_size));
- if (tbl) {
-+ pnv_pci_ioda2_table_free_pages(tbl);
- pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
- iommu_free_table(tbl, "pnv");
- }
++ ret = iommu->ops->create_table(iommu, 0,
++ IOMMU_PAGE_SHIFT_4K,
++ ilog2(iommu->tce32_size), 1, tbl);
++ if (!ret)
++ ret = iommu->ops->set_window(iommu, 0, tbl);
+ } else {
+ ret = -ENODEV;
+ }
--
-2.4.0.rc3.8.gfb3e7d5
+2.0.0