Inter-revision diff: patch 11

Comparing v1 (message) to v4 (message)

--- v1
+++ v4
@@ -1,121 +1,107 @@
-There moves locked pages accounting to helpers.
-Later they will be reused for Dynamic DMA windows (DDW).
+This replaces multiple calls of kzalloc_node() with a new
+iommu_table_alloc() helper. Right now it calls kzalloc_node() but
+later it will be modified to allocate a powerpc_iommu struct with
+a single iommu_table in it.
 
-While we are here, update the comment explaining why RLIMIT_MEMLOCK
-might be required to be bigger than the guest RAM.
+Later the helper will allocate a powerpc_iommu struct which embeds
+the iommu table(s).
 
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
 ---
- drivers/vfio/vfio_iommu_spapr_tce.c | 71 +++++++++++++++++++++++++++----------
- 1 file changed, 53 insertions(+), 18 deletions(-)
+ arch/powerpc/include/asm/iommu.h       |  1 +
+ arch/powerpc/kernel/iommu.c            |  9 +++++++++
+ arch/powerpc/platforms/powernv/pci.c   |  2 +-
+ arch/powerpc/platforms/pseries/iommu.c | 12 ++++--------
+ 4 files changed, 15 insertions(+), 9 deletions(-)
 
-diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index 1c1a9c4..c9fac97 100644
---- a/drivers/vfio/vfio_iommu_spapr_tce.c
-+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
-@@ -29,6 +29,46 @@
- static void tce_iommu_detach_group(void *iommu_data,
- 		struct iommu_group *iommu_group);
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index eb5822d..335e3d4 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -117,6 +117,7 @@ static inline void *get_iommu_table_base(struct device *dev)
+ 	return dev->archdata.dma_data.iommu_table_base;
+ }
  
-+static long try_increment_locked_vm(struct iommu_table *tbl)
++extern struct iommu_table *iommu_table_alloc(int node);
+ /* Frees table for an individual device node */
+ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
+ 
+diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
+index c51ad3e..2f7e92b 100644
+--- a/arch/powerpc/kernel/iommu.c
++++ b/arch/powerpc/kernel/iommu.c
+@@ -710,6 +710,15 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+ 	return tbl;
+ }
+ 
++struct iommu_table *iommu_table_alloc(int node)
 +{
-+	long ret = 0, locked, lock_limit, npages;
++	struct iommu_table *tbl;
 +
-+	if (!current || !current->mm)
-+		return -ESRCH; /* process exited */
++	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
 +
-+	npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-+
-+	down_write(&current->mm->mmap_sem);
-+	locked = current->mm->locked_vm + npages;
-+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-+	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
-+		pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
-+				rlimit(RLIMIT_MEMLOCK));
-+		ret = -ENOMEM;
-+	} else {
-+		current->mm->locked_vm += npages;
-+	}
-+	up_write(&current->mm->mmap_sem);
-+
-+	return ret;
++	return tbl;
 +}
 +
-+static void decrement_locked_vm(struct iommu_table *tbl)
-+{
-+	long npages;
-+
-+	if (!current || !current->mm)
-+		return; /* process exited */
-+
-+	npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-+
-+	down_write(&current->mm->mmap_sem);
-+	if (npages > current->mm->locked_vm)
-+		npages = current->mm->locked_vm;
-+	current->mm->locked_vm -= npages;
-+	up_write(&current->mm->mmap_sem);
-+}
-+
- /*
-  * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
-  *
-@@ -86,7 +126,6 @@ static void tce_iommu_take_ownership_notify(struct spapr_tce_iommu_group *data,
- static int tce_iommu_enable(struct tce_container *container)
+ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
  {
- 	int ret = 0;
--	unsigned long locked, lock_limit, npages;
- 	struct iommu_table *tbl;
- 	struct spapr_tce_iommu_group *data;
+ 	unsigned long bitmap_sz;
+diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
+index c4782b1..bbe529b 100644
+--- a/arch/powerpc/platforms/powernv/pci.c
++++ b/arch/powerpc/platforms/powernv/pci.c
+@@ -693,7 +693,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
+ 		       hose->dn->full_name);
+ 		return NULL;
+ 	}
+-	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
++	tbl = iommu_table_alloc(hose->node);
+ 	if (WARN_ON(!tbl))
+ 		return NULL;
+ 	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index 1aa1815..bc14299 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -617,8 +617,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
+ 	pci->phb->dma_window_size = 0x8000000ul;
+ 	pci->phb->dma_window_base_cur = 0x8000000ul;
  
-@@ -120,24 +159,23 @@ static int tce_iommu_enable(struct tce_container *container)
- 	 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
- 	 * that would effectively kill the guest at random points, much better
- 	 * enforcing the limit based on the max that the guest can map.
-+	 *
-+	 * Unfortunately at the moment it counts whole tables, no matter how
-+	 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
-+	 * each with 2GB DMA window, 8GB will be counted here. The reason for
-+	 * this is that we cannot tell here the amount of RAM used by the guest
-+	 * as this information is only available from KVM and VFIO is
-+	 * KVM agnostic.
- 	 */
- 	tbl = data->ops->get_table(data, 0);
- 	if (!tbl)
- 		return -ENXIO;
+-	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+-			   pci->phb->node);
++	tbl = iommu_table_alloc(pci->phb->node);
  
--	down_write(&current->mm->mmap_sem);
--	npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
--	locked = current->mm->locked_vm + npages;
--	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
--	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
--		pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
--				rlimit(RLIMIT_MEMLOCK));
--		ret = -ENOMEM;
--	} else {
--		current->mm->locked_vm += npages;
--		container->enabled = true;
--	}
--	up_write(&current->mm->mmap_sem);
-+	ret = try_increment_locked_vm(tbl);
-+	if (ret)
-+		return ret;
-+
-+	container->enabled = true;
+ 	iommu_table_setparms(pci->phb, dn, tbl);
+ 	tbl->it_ops = &iommu_table_pseries_ops;
+@@ -669,8 +668,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+ 		 pdn->full_name, ppci->iommu_table);
  
- 	return ret;
- }
-@@ -163,10 +201,7 @@ static void tce_iommu_disable(struct tce_container *container)
- 	if (!tbl)
- 		return;
+ 	if (!ppci->iommu_table) {
+-		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+-				   ppci->phb->node);
++		tbl = iommu_table_alloc(ppci->phb->node);
+ 		iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
+ 		tbl->it_ops = &iommu_table_lpar_multi_ops;
+ 		ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
+@@ -697,8 +695,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
+ 		struct pci_controller *phb = PCI_DN(dn)->phb;
  
--	down_write(&current->mm->mmap_sem);
--	current->mm->locked_vm -= (tbl->it_size <<
--			IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
--	up_write(&current->mm->mmap_sem);
-+	decrement_locked_vm(tbl);
- }
+ 		pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
+-		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+-				   phb->node);
++		tbl = iommu_table_alloc(phb->node);
+ 		iommu_table_setparms(phb, dn, tbl);
+ 		tbl->it_ops = &iommu_table_pseries_ops;
+ 		PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
+@@ -1120,8 +1117,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
  
- static void *tce_iommu_open(unsigned long arg)
+ 	pci = PCI_DN(pdn);
+ 	if (!pci->iommu_table) {
+-		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+-				   pci->phb->node);
++		tbl = iommu_table_alloc(pci->phb->node);
+ 		iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
+ 		tbl->it_ops = &iommu_table_lpar_multi_ops;
+ 		pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
 -- 
 2.0.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help