Inter-revision diff: patch 29

Comparing v5 (message) to v7 (message)

--- v5
+++ v7
@@ -1,330 +1,421 @@
-This adds create/remove window ioctls to create and remove DMA windows.
-sPAPR defines a Dynamic DMA windows capability which allows
-para-virtualized guests to create additional DMA windows on a PCI bus.
-The existing linux kernels use this new window to map the entire guest
-memory and switch to the direct DMA operations saving time on map/unmap
-requests which would normally happen in a big amounts.
-
-This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and
-VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows.
-Up to 2 windows are supported now by the hardware and by this driver.
-
-This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional
-information such as a number of supported windows and maximum number
-levels of TCE tables.
+The existing implementation accounts the whole DMA window in
+the locked_vm counter. This is going to be worse with multiple
+containers and huge DMA windows. Also, real-time accounting would requite
+additional tracking of accounted pages due to the page size difference -
+IOMMU uses 4K pages and system uses 4K or 64K pages.
+
+Another issue is that actual pages pinning/unpinning happens on every
+DMA map/unmap request. This does not affect the performance much now as
+we spend way too much time now on switching context between
+guest/userspace/host but this will start to matter when we add in-kernel
+DMA map/unmap acceleration.
+
+This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU.
+New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces
+2 new ioctls to register/unregister DMA memory -
+VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY -
+which receive user space address and size of a memory region which
+needs to be pinned/unpinned and counted in locked_vm.
+New IOMMU splits physical pages pinning and TCE table update into 2 different
+operations. It requires 1) guest pages to be registered first 2) consequent
+map/unmap requests to work only with pre-registered memory.
+For the default single window case this means that the entire guest
+(instead of 2GB) needs to be pinned before using VFIO.
+When a huge DMA window is added, no additional pinning will be
+required, otherwise it would be guest RAM + 2GB.
+
+The new memory registration ioctls are not supported by
+VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration
+will require memory to be preregistered in order to work.
+
+The accounting is done per the user process.
+
+This advertises v2 SPAPR TCE IOMMU and restricts what the userspace
+can do with v1 or v2 IOMMUs.
 
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
 ---
 Changes:
+v7:
+* now memory is registered per mm (i.e. process)
+* moved memory registration code to powerpc/mmu
+* merged "vfio: powerpc/spapr: Define v2 IOMMU" into this
+* limited new ioctls to v2 IOMMU
+* updated doc
+* unsupported ioclts return -ENOTTY instead of -EPERM
+
+v6:
+* tce_get_hva_cached() returns hva via a pointer
+
 v4:
-* moved code to tce_iommu_create_window()/tce_iommu_remove_window()
-helpers
-* added docs
+* updated docs
+* s/kzmalloc/vzalloc/
+* in tce_pin_pages()/tce_unpin_pages() removed @vaddr, @size and
+replaced offset with index
+* renamed vfio_iommu_type_register_memory to vfio_iommu_spapr_register_memory
+and removed duplicating vfio_iommu_spapr_register_memory
 ---
- Documentation/vfio.txt              |  19 +++++
- arch/powerpc/include/asm/iommu.h    |   2 +-
- drivers/vfio/vfio_iommu_spapr_tce.c | 165 +++++++++++++++++++++++++++++++++++-
- include/uapi/linux/vfio.h           |  24 +++++-
- 4 files changed, 207 insertions(+), 3 deletions(-)
-
-diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
-index 791e85c..61ce393 100644
---- a/Documentation/vfio.txt
-+++ b/Documentation/vfio.txt
-@@ -446,6 +446,25 @@ the memory block.
- The user space is not expected to call these often and the block descriptors
- are stored in a linked list in the kernel.
- 
-+6) sPAPR specification allows guests to have an ddditional DMA window(s) on
-+a PCI bus with a variable page size. Two ioctls have been added to support
-+this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
-+The platform has to support the functionality or error will be returned to
-+the userspace. The existing hardware supports up to 2 DMA windows, one is
-+2GB long, uses 4K pages and called "default 32bit window"; the other can
-+be as big as entire RAM, use different page size, it is optional - guests
-+create those in run-time if the guest driver supports 64bit DMA.
-+
-+VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
-+a number of TCE table levels (if a TCE table is going to be big enough and
-+the kernel may not be able to allocate enough of physicall contiguous memory).
-+It creates a new window in the available slot and returns the bus address where
-+the new window starts. Due to hardware limitation, the user space cannot choose
-+the location of DMA windows.
-+
-+VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
-+and removes it.
-+
- -------------------------------------------------------------------------------
- 
- [1] VFIO was originally an acronym for "Virtual Function I/O" in its
-diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
-index 04f72ac..de82b61 100644
---- a/arch/powerpc/include/asm/iommu.h
-+++ b/arch/powerpc/include/asm/iommu.h
-@@ -138,7 +138,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
- extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
- 					    int nid);
- 
--#define IOMMU_TABLE_GROUP_MAX_TABLES	1
-+#define IOMMU_TABLE_GROUP_MAX_TABLES	2
- 
- struct iommu_table_group;
- 
+ drivers/vfio/vfio_iommu_spapr_tce.c | 232 +++++++++++++++++++++++++++++++++++-
+ include/uapi/linux/vfio.h           |  27 +++++
+ 2 files changed, 253 insertions(+), 6 deletions(-)
+
 diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index 3a0b5fe..7aa4141b 100644
+index 9aeaed6..5049b4f 100644
 --- a/drivers/vfio/vfio_iommu_spapr_tce.c
 +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
-@@ -96,6 +96,7 @@ struct tce_container {
- 	struct list_head mem_list;
- 	struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
- 	struct list_head group_list;
+@@ -21,6 +21,7 @@
+ #include <linux/vfio.h>
+ #include <asm/iommu.h>
+ #include <asm/tce.h>
++#include <asm/mmu_context.h>
+ 
+ #define DRIVER_VERSION  "0.1"
+ #define DRIVER_AUTHOR   "aik@ozlabs.ru"
+@@ -91,8 +92,58 @@ struct tce_container {
+ 	struct iommu_group *grp;
+ 	bool enabled;
+ 	unsigned long locked_pages;
 +	bool v2;
  };
  
- struct tce_iommu_group {
-@@ -333,6 +334,20 @@ static struct iommu_table *spapr_tce_find_table(
++static long tce_unregister_pages(struct tce_container *container,
++		__u64 vaddr, __u64 size)
++{
++	long ret;
++	mm_iommu_table_group_mem_t *mem;
++
++	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
++		return -EINVAL;
++
++	mem = mm_iommu_get(vaddr, size >> PAGE_SHIFT);
++	if (!mem)
++		return -EINVAL;
++
++	ret = mm_iommu_put(mem); /* undo kref_get() from mm_iommu_get() */
++	if (!ret)
++		ret = mm_iommu_put(mem);
++
++	return ret;
++}
++
++static long tce_register_pages(struct tce_container *container,
++		__u64 vaddr, __u64 size)
++{
++	long ret = 0;
++	mm_iommu_table_group_mem_t *mem;
++	unsigned long entries = size >> PAGE_SHIFT;
++
++	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
++			((vaddr + size) < vaddr))
++		return -EINVAL;
++
++	mem = mm_iommu_get(vaddr, entries);
++	if (!mem) {
++		ret = try_increment_locked_vm(entries);
++		if (ret)
++			return ret;
++
++		ret = mm_iommu_alloc(vaddr, entries, &mem);
++		if (ret) {
++			decrement_locked_vm(entries);
++			return ret;
++		}
++	}
++
++	container->enabled = true;
++
++	return 0;
++}
++
+ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
+ {
+ 	/*
+@@ -205,7 +256,7 @@ static void *tce_iommu_open(unsigned long arg)
+ {
+ 	struct tce_container *container;
+ 
+-	if (arg != VFIO_SPAPR_TCE_IOMMU) {
++	if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
+ 		pr_err("tce_vfio: Wrong IOMMU type\n");
+ 		return ERR_PTR(-EINVAL);
+ 	}
+@@ -215,6 +266,7 @@ static void *tce_iommu_open(unsigned long arg)
+ 		return ERR_PTR(-ENOMEM);
+ 
+ 	mutex_init(&container->lock);
++	container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
+ 
+ 	return container;
+ }
+@@ -257,6 +309,49 @@ static void tce_iommu_unuse_page(struct tce_container *container,
+ 	put_page(page);
+ }
+ 
++static int tce_get_hva_cached(unsigned long tce, unsigned long size,
++		unsigned long *hva, mm_iommu_table_group_mem_t **pmem)
++{
++	long ret = 0;
++	unsigned long hpa;
++	mm_iommu_table_group_mem_t *mem;
++
++	mem = mm_iommu_lookup(tce, size);
++	if (!mem)
++		return -EINVAL;
++
++	ret = mm_iommu_ua_to_hpa(mem, tce, &hpa);
++	if (ret)
++		return -EINVAL;
++
++	*hva = (unsigned long) __va(hpa);
++	*pmem = mem;
++
++	return 0;
++}
++
++static void tce_iommu_unuse_page_v2(struct iommu_table *tbl,
++		unsigned long entry)
++{
++	mm_iommu_table_group_mem_t *mem = NULL;
++	int ret;
++	unsigned long hva = 0;
++	unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++
++	if (!pua || !current || !current->mm)
++		return;
++
++	ret = tce_get_hva_cached(*pua, IOMMU_PAGE_SIZE(tbl),
++			&hva, &mem);
++	if (ret)
++		pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
++				__func__, *pua, entry, ret);
++	if (mem)
++		mm_iommu_mapped_update(mem, false);
++
++	*pua = 0;
++}
++
+ static int tce_iommu_clear(struct tce_container *container,
+ 		struct iommu_table *tbl,
+ 		unsigned long entry, unsigned long pages)
+@@ -275,6 +370,11 @@ static int tce_iommu_clear(struct tce_container *container,
+ 		if (direction == DMA_NONE)
+ 			continue;
+ 
++		if (container->v2) {
++			tce_iommu_unuse_page_v2(tbl, entry);
++			continue;
++		}
++
+ 		tce_iommu_unuse_page(container, tce);
+ 	}
+ 
+@@ -342,6 +442,62 @@ static long tce_iommu_build(struct tce_container *container,
  	return ret;
  }
  
-+static int spapr_tce_find_free_table(struct tce_container *container)
++static long tce_iommu_build_v2(struct tce_container *container,
++		struct iommu_table *tbl,
++		unsigned long entry, unsigned long tce, unsigned long pages,
++		enum dma_data_direction direction)
 +{
-+	int i;
-+
-+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-+		struct iommu_table *tbl = &container->tables[i];
-+
-+		if (!tbl->it_size)
-+			return i;
++	long i, ret = 0;
++	struct page *page;
++	unsigned long hva;
++	enum dma_data_direction dirtmp;
++
++	for (i = 0; i < pages; ++i) {
++		mm_iommu_table_group_mem_t *mem = NULL;
++		unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
++				entry + i);
++
++		ret = tce_get_hva_cached(tce, IOMMU_PAGE_SIZE(tbl),
++				&hva, &mem);
++		if (ret)
++			break;
++
++		page = pfn_to_page(__pa(hva) >> PAGE_SHIFT);
++		if (!tce_page_is_contained(page, tbl->it_page_shift)) {
++			ret = -EPERM;
++			break;
++		}
++
++		/* Preserve offset within IOMMU page */
++		hva |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
++		dirtmp = direction;
++
++		ret = iommu_tce_xchg(tbl, entry + i, &hva, &dirtmp);
++		if (ret) {
++			/* dirtmp cannot be DMA_NONE here */
++			tce_iommu_unuse_page_v2(tbl, entry + i);
++			pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
++					__func__, entry << tbl->it_page_shift,
++					tce, ret);
++			break;
++		}
++
++		mm_iommu_mapped_update(mem, true);
++
++		if (dirtmp != DMA_NONE)
++			tce_iommu_unuse_page_v2(tbl, entry + i);
++
++		*pua = tce;
++
++		tce += IOMMU_PAGE_SIZE(tbl);
 +	}
 +
-+	return -1;
-+}
-+
- static int tce_iommu_enable(struct tce_container *container)
- {
- 	int ret = 0;
-@@ -432,6 +447,8 @@ static void *tce_iommu_open(unsigned long arg)
- 	INIT_LIST_HEAD_RCU(&container->mem_list);
- 	INIT_LIST_HEAD_RCU(&container->group_list);
- 
-+	container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
-+
- 	return container;
- }
- 
-@@ -605,11 +622,90 @@ static long tce_iommu_build(struct tce_container *container,
- 	return ret;
- }
- 
-+static long tce_iommu_create_window(struct tce_container *container,
-+		__u32 page_shift, __u64 window_size, __u32 levels,
-+		__u64 *start_addr)
-+{
-+	struct iommu_table_group *table_group;
-+	struct tce_iommu_group *tcegrp;
-+	int num;
-+	long ret;
-+
-+	num = spapr_tce_find_free_table(container);
-+	if (num < 0)
-+		return -ENOSYS;
-+
-+	tcegrp = list_first_entry(&container->group_list,
-+			struct tce_iommu_group, next);
-+	table_group = iommu_group_get_iommudata(tcegrp->grp);
-+
-+	ret = table_group->ops->create_table(table_group, num,
-+			page_shift, window_size, levels,
-+			&container->tables[num]);
 +	if (ret)
-+		return ret;
-+
-+	list_for_each_entry(tcegrp, &container->group_list, next) {
-+		struct iommu_table_group *table_group_tmp =
-+			iommu_group_get_iommudata(tcegrp->grp);
-+
-+		if (WARN_ON_ONCE(table_group_tmp->ops != table_group->ops))
-+			return -EFAULT;
-+
-+		ret = table_group->ops->set_window(table_group_tmp, num,
-+				&container->tables[num]);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	*start_addr = container->tables[num].it_offset <<
-+		container->tables[num].it_page_shift;
-+
-+	return 0;
-+}
-+
-+static long tce_iommu_remove_window(struct tce_container *container,
-+		__u64 start_addr)
-+{
-+	struct iommu_table_group *table_group = NULL;
-+	struct iommu_table *tbl;
-+	struct tce_iommu_group *tcegrp;
-+	int num;
-+
-+	tbl = spapr_tce_find_table(container, start_addr);
-+	if (!tbl)
-+		return -EINVAL;
-+
-+	/* Detach groups from IOMMUs */
-+	num = tbl - container->tables;
-+	list_for_each_entry(tcegrp, &container->group_list, next) {
-+		table_group = iommu_group_get_iommudata(tcegrp->grp);
-+		if (!table_group->ops || !table_group->ops->unset_window)
-+			return -EFAULT;
-+		if (container->tables[num].it_size)
-+			table_group->ops->unset_window(table_group, num);
-+	}
-+
-+	/* Free table */
-+	tcegrp = list_first_entry(&container->group_list,
-+			struct tce_iommu_group, next);
-+	table_group = iommu_group_get_iommudata(tcegrp->grp);
-+
-+	tce_iommu_clear(container, tbl,
-+			tbl->it_offset, tbl->it_size);
-+	if (tbl->it_ops->free)
-+		tbl->it_ops->free(tbl);
-+
-+	memset(tbl, 0, sizeof(*tbl));
-+
-+	return 0;
++		tce_iommu_clear(container, tbl, entry, i);
++
++	return ret;
 +}
 +
  static long tce_iommu_ioctl(void *iommu_data,
  				 unsigned int cmd, unsigned long arg)
  {
- 	struct tce_container *container = iommu_data;
--	unsigned long minsz;
-+	unsigned long minsz, ddwsz;
- 	long ret;
- 
- 	switch (cmd) {
-@@ -652,6 +748,16 @@ static long tce_iommu_ioctl(void *iommu_data,
- 
- 		info.dma32_window_start = table_group->tce32_start;
- 		info.dma32_window_size = table_group->tce32_size;
-+		info.max_dynamic_windows_supported =
-+				table_group->max_dynamic_windows_supported;
-+		info.levels = table_group->max_levels;
-+		info.flags = table_group->flags;
-+
-+		ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info,
-+				levels);
-+
-+		if (info.argsz == ddwsz)
-+			minsz = ddwsz;
- 
- 		if (copy_to_user((void __user *)arg, &info, minsz))
- 			return -EFAULT;
-@@ -823,6 +929,63 @@ static long tce_iommu_ioctl(void *iommu_data,
+@@ -353,6 +509,7 @@ static long tce_iommu_ioctl(void *iommu_data,
+ 	case VFIO_CHECK_EXTENSION:
+ 		switch (arg) {
+ 		case VFIO_SPAPR_TCE_IOMMU:
++		case VFIO_SPAPR_TCE_v2_IOMMU:
+ 			ret = 1;
+ 			break;
+ 		default:
+@@ -440,11 +597,18 @@ static long tce_iommu_ioctl(void *iommu_data,
+ 		if (ret)
+ 			return ret;
+ 
+-		ret = tce_iommu_build(container, tbl,
+-				param.iova >> tbl->it_page_shift,
+-				param.vaddr,
+-				param.size >> tbl->it_page_shift,
+-				direction);
++		if (container->v2)
++			ret = tce_iommu_build_v2(container, tbl,
++					param.iova >> tbl->it_page_shift,
++					param.vaddr,
++					param.size >> tbl->it_page_shift,
++					direction);
++		else
++			ret = tce_iommu_build(container, tbl,
++					param.iova >> tbl->it_page_shift,
++					param.vaddr,
++					param.size >> tbl->it_page_shift,
++					direction);
+ 
+ 		iommu_flush_tce(tbl);
+ 
+@@ -489,7 +653,60 @@ static long tce_iommu_ioctl(void *iommu_data,
+ 
  		return ret;
  	}
- 
-+	case VFIO_IOMMU_SPAPR_TCE_CREATE: {
-+		struct vfio_iommu_spapr_tce_create create;
-+
-+		if (!tce_preregistered(container))
-+			return -EPERM;
-+
-+		minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
-+				start_addr);
-+
-+		if (copy_from_user(&create, (void __user *)arg, minsz))
++	case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
++		struct vfio_iommu_spapr_register_memory param;
++
++		if (!container->v2)
++			break;
++
++		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
++				size);
++
++		if (copy_from_user(&param, (void __user *)arg, minsz))
 +			return -EFAULT;
 +
-+		if (create.argsz < minsz)
++		if (param.argsz < minsz)
 +			return -EINVAL;
 +
-+		if (create.flags)
++		/* No flag is supported now */
++		if (param.flags)
 +			return -EINVAL;
 +
 +		mutex_lock(&container->lock);
-+
-+		ret = tce_iommu_create_window(container, create.page_shift,
-+				create.window_size, create.levels,
-+				&create.start_addr);
-+
-+		if (!ret && copy_to_user((void __user *)arg, &create, minsz))
-+			return -EFAULT;
-+
++		ret = tce_register_pages(container, param.vaddr, param.size);
 +		mutex_unlock(&container->lock);
 +
 +		return ret;
 +	}
-+	case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
-+		struct vfio_iommu_spapr_tce_remove remove;
-+
-+		if (!tce_preregistered(container))
-+			return -EPERM;
-+
-+		minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
-+				start_addr);
-+
-+		if (copy_from_user(&remove, (void __user *)arg, minsz))
++	case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
++		struct vfio_iommu_spapr_register_memory param;
++
++		if (!container->v2)
++			break;
++
++		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
++				size);
++
++		if (copy_from_user(&param, (void __user *)arg, minsz))
 +			return -EFAULT;
 +
-+		if (remove.argsz < minsz)
++		if (param.argsz < minsz)
 +			return -EINVAL;
 +
-+		if (remove.flags)
++		/* No flag is supported now */
++		if (param.flags)
 +			return -EINVAL;
 +
 +		mutex_lock(&container->lock);
-+
-+		ret = tce_iommu_remove_window(container, remove.start_addr);
-+
++		tce_unregister_pages(container, param.vaddr, param.size);
 +		mutex_unlock(&container->lock);
 +
-+		return ret;
++		return 0;
 +	}
- 	}
- 
- 	return -ENOTTY;
+ 	case VFIO_IOMMU_ENABLE:
++		if (container->v2)
++			break;
++
+ 		mutex_lock(&container->lock);
+ 		ret = tce_iommu_enable(container);
+ 		mutex_unlock(&container->lock);
+@@ -497,6 +714,9 @@ static long tce_iommu_ioctl(void *iommu_data,
+ 
+ 
+ 	case VFIO_IOMMU_DISABLE:
++		if (container->v2)
++			break;
++
+ 		mutex_lock(&container->lock);
+ 		tce_iommu_disable(container);
+ 		mutex_unlock(&container->lock);
 diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
-index fbc5286..150f418 100644
+index 82889c3..fbc5286 100644
 --- a/include/uapi/linux/vfio.h
 +++ b/include/uapi/linux/vfio.h
-@@ -457,9 +457,11 @@ struct vfio_iommu_type1_dma_unmap {
-  */
- struct vfio_iommu_spapr_tce_info {
- 	__u32 argsz;
--	__u32 flags;			/* reserved for future use */
-+	__u32 flags;
- 	__u32 dma32_window_start;	/* 32 bit window start (bytes) */
- 	__u32 dma32_window_size;	/* 32 bit window size (bytes) */
-+	__u32 max_dynamic_windows_supported;
-+	__u32 levels;
- };
- 
- #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
-@@ -520,6 +522,26 @@ struct vfio_iommu_spapr_register_memory {
-  */
- #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
- 
-+struct vfio_iommu_spapr_tce_create {
-+	__u32 argsz;
-+	__u32 flags;
-+	/* in */
-+	__u32 page_shift;
-+	__u64 window_size;
-+	__u32 levels;
-+	/* out */
-+	__u64 start_addr;
+@@ -36,6 +36,8 @@
+ /* Two-stage IOMMU */
+ #define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
+ 
++#define VFIO_SPAPR_TCE_v2_IOMMU		7
++
+ /*
+  * The IOCTL interface is designed for extensibility by embedding the
+  * structure length (argsz) and flags into structures passed between
+@@ -493,6 +495,31 @@ struct vfio_eeh_pe_op {
+ 
+ #define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+ 
++/**
++ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
++ *
++ * Registers user space memory where DMA is allowed. It pins
++ * user pages and does the locked memory accounting so
++ * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
++ * get faster.
++ */
++struct vfio_iommu_spapr_register_memory {
++	__u32	argsz;
++	__u32	flags;
++	__u64	vaddr;				/* Process virtual address */
++	__u64	size;				/* Size of mapping (bytes) */
 +};
-+#define VFIO_IOMMU_SPAPR_TCE_CREATE	_IO(VFIO_TYPE, VFIO_BASE + 19)
-+
-+struct vfio_iommu_spapr_tce_remove {
-+	__u32 argsz;
-+	__u32 flags;
-+	/* in */
-+	__u64 start_addr;
-+};
-+#define VFIO_IOMMU_SPAPR_TCE_REMOVE	_IO(VFIO_TYPE, VFIO_BASE + 20)
++#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 17)
++
++/**
++ * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
++ *
++ * Unregisters user space memory registered with
++ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
++ * Uses vfio_iommu_spapr_register_memory for parameters.
++ */
++#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
 +
  /* ***************************************************************** */
  
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help