--- v1
+++ v4
@@ -1,121 +1,107 @@
-There moves locked pages accounting to helpers.
-Later they will be reused for Dynamic DMA windows (DDW).
+This replaces multiple calls of kzalloc_node() with a new
+iommu_table_alloc() helper. Right now it calls kzalloc_node() but
+later it will be modified to allocate a powerpc_iommu struct with
+a single iommu_table in it.
-While we are here, update the comment explaining why RLIMIT_MEMLOCK
-might be required to be bigger than the guest RAM.
+Later the helper will allocate a powerpc_iommu struct which embeds
+the iommu table(s).
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
- drivers/vfio/vfio_iommu_spapr_tce.c | 71 +++++++++++++++++++++++++++----------
- 1 file changed, 53 insertions(+), 18 deletions(-)
+ arch/powerpc/include/asm/iommu.h | 1 +
+ arch/powerpc/kernel/iommu.c | 9 +++++++++
+ arch/powerpc/platforms/powernv/pci.c | 2 +-
+ arch/powerpc/platforms/pseries/iommu.c | 12 ++++--------
+ 4 files changed, 15 insertions(+), 9 deletions(-)
-diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index 1c1a9c4..c9fac97 100644
---- a/drivers/vfio/vfio_iommu_spapr_tce.c
-+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
-@@ -29,6 +29,46 @@
- static void tce_iommu_detach_group(void *iommu_data,
- struct iommu_group *iommu_group);
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index eb5822d..335e3d4 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -117,6 +117,7 @@ static inline void *get_iommu_table_base(struct device *dev)
+ return dev->archdata.dma_data.iommu_table_base;
+ }
-+static long try_increment_locked_vm(struct iommu_table *tbl)
++extern struct iommu_table *iommu_table_alloc(int node);
+ /* Frees table for an individual device node */
+ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
+
+diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
+index c51ad3e..2f7e92b 100644
+--- a/arch/powerpc/kernel/iommu.c
++++ b/arch/powerpc/kernel/iommu.c
+@@ -710,6 +710,15 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+ return tbl;
+ }
+
++struct iommu_table *iommu_table_alloc(int node)
+{
-+ long ret = 0, locked, lock_limit, npages;
++ struct iommu_table *tbl;
+
-+ if (!current || !current->mm)
-+ return -ESRCH; /* process exited */
++ tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
+
-+ npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-+
-+ down_write(¤t->mm->mmap_sem);
-+ locked = current->mm->locked_vm + npages;
-+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
-+ pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
-+ rlimit(RLIMIT_MEMLOCK));
-+ ret = -ENOMEM;
-+ } else {
-+ current->mm->locked_vm += npages;
-+ }
-+ up_write(¤t->mm->mmap_sem);
-+
-+ return ret;
++ return tbl;
+}
+
-+static void decrement_locked_vm(struct iommu_table *tbl)
-+{
-+ long npages;
-+
-+ if (!current || !current->mm)
-+ return; /* process exited */
-+
-+ npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-+
-+ down_write(¤t->mm->mmap_sem);
-+ if (npages > current->mm->locked_vm)
-+ npages = current->mm->locked_vm;
-+ current->mm->locked_vm -= npages;
-+ up_write(¤t->mm->mmap_sem);
-+}
-+
- /*
- * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
- *
-@@ -86,7 +126,6 @@ static void tce_iommu_take_ownership_notify(struct spapr_tce_iommu_group *data,
- static int tce_iommu_enable(struct tce_container *container)
+ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
{
- int ret = 0;
-- unsigned long locked, lock_limit, npages;
- struct iommu_table *tbl;
- struct spapr_tce_iommu_group *data;
+ unsigned long bitmap_sz;
+diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
+index c4782b1..bbe529b 100644
+--- a/arch/powerpc/platforms/powernv/pci.c
++++ b/arch/powerpc/platforms/powernv/pci.c
+@@ -693,7 +693,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
+ hose->dn->full_name);
+ return NULL;
+ }
+- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
++ tbl = iommu_table_alloc(hose->node);
+ if (WARN_ON(!tbl))
+ return NULL;
+ pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index 1aa1815..bc14299 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -617,8 +617,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
+ pci->phb->dma_window_size = 0x8000000ul;
+ pci->phb->dma_window_base_cur = 0x8000000ul;
-@@ -120,24 +159,23 @@ static int tce_iommu_enable(struct tce_container *container)
- * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
- * that would effectively kill the guest at random points, much better
- * enforcing the limit based on the max that the guest can map.
-+ *
-+ * Unfortunately at the moment it counts whole tables, no matter how
-+ * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
-+ * each with 2GB DMA window, 8GB will be counted here. The reason for
-+ * this is that we cannot tell here the amount of RAM used by the guest
-+ * as this information is only available from KVM and VFIO is
-+ * KVM agnostic.
- */
- tbl = data->ops->get_table(data, 0);
- if (!tbl)
- return -ENXIO;
+- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+- pci->phb->node);
++ tbl = iommu_table_alloc(pci->phb->node);
-- down_write(¤t->mm->mmap_sem);
-- npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-- locked = current->mm->locked_vm + npages;
-- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-- if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
-- pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
-- rlimit(RLIMIT_MEMLOCK));
-- ret = -ENOMEM;
-- } else {
-- current->mm->locked_vm += npages;
-- container->enabled = true;
-- }
-- up_write(¤t->mm->mmap_sem);
-+ ret = try_increment_locked_vm(tbl);
-+ if (ret)
-+ return ret;
-+
-+ container->enabled = true;
+ iommu_table_setparms(pci->phb, dn, tbl);
+ tbl->it_ops = &iommu_table_pseries_ops;
+@@ -669,8 +668,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+ pdn->full_name, ppci->iommu_table);
- return ret;
- }
-@@ -163,10 +201,7 @@ static void tce_iommu_disable(struct tce_container *container)
- if (!tbl)
- return;
+ if (!ppci->iommu_table) {
+- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+- ppci->phb->node);
++ tbl = iommu_table_alloc(ppci->phb->node);
+ iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
+@@ -697,8 +695,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
+ struct pci_controller *phb = PCI_DN(dn)->phb;
-- down_write(¤t->mm->mmap_sem);
-- current->mm->locked_vm -= (tbl->it_size <<
-- IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-- up_write(¤t->mm->mmap_sem);
-+ decrement_locked_vm(tbl);
- }
+ pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
+- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+- phb->node);
++ tbl = iommu_table_alloc(phb->node);
+ iommu_table_setparms(phb, dn, tbl);
+ tbl->it_ops = &iommu_table_pseries_ops;
+ PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
+@@ -1120,8 +1117,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
- static void *tce_iommu_open(unsigned long arg)
+ pci = PCI_DN(pdn);
+ if (!pci->iommu_table) {
+- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+- pci->phb->node);
++ tbl = iommu_table_alloc(pci->phb->node);
+ iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
--
2.0.0