Inter-revision diff: patch 5

Comparing v5 (message) to v10 (message)

--- v5
+++ v10
@@ -1,153 +1,29 @@
-There moves locked pages accounting to helpers.
-Later they will be reused for Dynamic DMA windows (DDW).
+At the moment iommu_free_table() only releases memory if
+the table was initialized for the platform code use, i.e. it had
+it_map initialized (which purpose is to track DMA memory space use).
 
-This reworks debug messages to show the current value and the limit.
-
-This stores the locked pages number in the container so when unlocking
-the iommu table pointer won't be needed. This does not have an effect
-now but it will with the multiple tables per container as then we will
-allow attaching/detaching groups on fly and we may end up having
-a container with no group attached but with the counter incremented.
-
-While we are here, update the comment explaining why RLIMIT_MEMLOCK
-might be required to be bigger than the guest RAM. This also prints
-pid of the current process in pr_warn/pr_debug.
+With dynamic DMA windows, we will need to be able to release
+iommu_table even if it was used for VFIO in which case it_map is NULL
+so does the patch.
 
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
 ---
-Changes:
-v4:
-* new helpers do nothing if @npages == 0
-* tce_iommu_disable() now can decrement the counter if the group was
-detached (not possible now but will be in the future)
----
- drivers/vfio/vfio_iommu_spapr_tce.c | 82 ++++++++++++++++++++++++++++---------
- 1 file changed, 63 insertions(+), 19 deletions(-)
+ arch/powerpc/kernel/iommu.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
 
-diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index 0e37400..432a0de 100644
---- a/drivers/vfio/vfio_iommu_spapr_tce.c
-+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
-@@ -31,6 +31,51 @@
- static void tce_iommu_detach_group(void *iommu_data,
- 		struct iommu_group *iommu_group);
+diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
+index 3d47eb3..2c02d4c 100644
+--- a/arch/powerpc/kernel/iommu.c
++++ b/arch/powerpc/kernel/iommu.c
+@@ -714,8 +714,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+ 	unsigned int order;
  
-+static long try_increment_locked_vm(long npages)
-+{
-+	long ret = 0, locked, lock_limit;
-+
-+	if (!current || !current->mm)
-+		return -ESRCH; /* process exited */
-+
-+	if (!npages)
-+		return 0;
-+
-+	down_write(&current->mm->mmap_sem);
-+	locked = current->mm->locked_vm + npages;
-+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-+	if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-+		ret = -ENOMEM;
-+	else
-+		current->mm->locked_vm += npages;
-+
-+	pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
-+			npages << PAGE_SHIFT,
-+			current->mm->locked_vm << PAGE_SHIFT,
-+			rlimit(RLIMIT_MEMLOCK),
-+			ret ? " - exceeded" : "");
-+
-+	up_write(&current->mm->mmap_sem);
-+
-+	return ret;
-+}
-+
-+static void decrement_locked_vm(long npages)
-+{
-+	if (!current || !current->mm || !npages)
-+		return; /* process exited */
-+
-+	down_write(&current->mm->mmap_sem);
-+	if (npages > current->mm->locked_vm)
-+		npages = current->mm->locked_vm;
-+	current->mm->locked_vm -= npages;
-+	pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
-+			npages << PAGE_SHIFT,
-+			current->mm->locked_vm << PAGE_SHIFT,
-+			rlimit(RLIMIT_MEMLOCK));
-+	up_write(&current->mm->mmap_sem);
-+}
-+
- /*
-  * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
-  *
-@@ -47,6 +92,7 @@ struct tce_container {
- 	struct mutex lock;
- 	struct iommu_table *tbl;
- 	bool enabled;
-+	unsigned long locked_pages;
- };
+ 	if (!tbl || !tbl->it_map) {
+-		printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
+-				node_name);
++		kfree(tbl);
+ 		return;
+ 	}
  
- static bool tce_page_is_contained(struct page *page, unsigned page_shift)
-@@ -68,7 +114,7 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
- static int tce_iommu_enable(struct tce_container *container)
- {
- 	int ret = 0;
--	unsigned long locked, lock_limit, npages;
-+	unsigned long locked;
- 	struct iommu_table *tbl = container->tbl;
- 
- 	if (!container->tbl)
-@@ -97,21 +143,22 @@ static int tce_iommu_enable(struct tce_container *container)
- 	 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
- 	 * that would effectively kill the guest at random points, much better
- 	 * enforcing the limit based on the max that the guest can map.
-+	 *
-+	 * Unfortunately at the moment it counts whole tables, no matter how
-+	 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
-+	 * each with 2GB DMA window, 8GB will be counted here. The reason for
-+	 * this is that we cannot tell here the amount of RAM used by the guest
-+	 * as this information is only available from KVM and VFIO is
-+	 * KVM agnostic.
- 	 */
--	down_write(&current->mm->mmap_sem);
--	npages = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
--	locked = current->mm->locked_vm + npages;
--	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
--	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
--		pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
--				rlimit(RLIMIT_MEMLOCK));
--		ret = -ENOMEM;
--	} else {
-+	locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
-+	ret = try_increment_locked_vm(locked);
-+	if (ret)
-+		return ret;
- 
--		current->mm->locked_vm += npages;
--		container->enabled = true;
--	}
--	up_write(&current->mm->mmap_sem);
-+	container->locked_pages = locked;
-+
-+	container->enabled = true;
- 
- 	return ret;
- }
-@@ -123,13 +170,10 @@ static void tce_iommu_disable(struct tce_container *container)
- 
- 	container->enabled = false;
- 
--	if (!container->tbl || !current->mm)
-+	if (!current->mm)
- 		return;
- 
--	down_write(&current->mm->mmap_sem);
--	current->mm->locked_vm -= (container->tbl->it_size <<
--			container->tbl->it_page_shift) >> PAGE_SHIFT;
--	up_write(&current->mm->mmap_sem);
-+	decrement_locked_vm(container->locked_pages);
- }
- 
- static void *tce_iommu_open(unsigned long arg)
 -- 
-2.0.0
+2.4.0.rc3.8.gfb3e7d5
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help