Inter-revision diff: patch 33

Comparing v11 (message) to v10 (message)

--- v11
+++ v10
@@ -48,11 +48,12 @@
 [aw: for the vfio related changes]
 Acked-by: Alex Williamson <alex.williamson@redhat.com>
 ---
+
+Alex, should I remove your "acked-by" in the cases like this and
+get another one?
+
+---
 Changes:
-v11:
-* mm_iommu_put() does not return a code so this does not check it
-* moved "v2" in tce_container to pack the struct
-
 v10:
 * moved it_userspace allocation to vfio_iommu_spapr_tce as it VFIO
 specific thing
@@ -88,9 +89,9 @@
 ---
  Documentation/vfio.txt              |  31 ++-
  arch/powerpc/include/asm/iommu.h    |   6 +
- drivers/vfio/vfio_iommu_spapr_tce.c | 512 ++++++++++++++++++++++++++++++------
+ drivers/vfio/vfio_iommu_spapr_tce.c | 516 ++++++++++++++++++++++++++++++------
  include/uapi/linux/vfio.h           |  27 ++
- 4 files changed, 487 insertions(+), 89 deletions(-)
+ 4 files changed, 494 insertions(+), 86 deletions(-)
 
 diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
 index 96978ec..7dcf2b5 100644
@@ -143,14 +144,15 @@
  
  [1] VFIO was originally an acronym for "Virtual Function I/O" in its
 diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
-index 9d37492..f9957eb 100644
+index c8bad21..763c041 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
-@@ -112,9 +112,15 @@ struct iommu_table {
- 	unsigned long *it_map;       /* A simple allocation bitmap for now */
+@@ -113,10 +113,16 @@ struct iommu_table {
  	unsigned long  it_page_shift;/* table iommu page size */
+ #ifdef CONFIG_IOMMU_API
  	struct list_head it_group_list;/* List of iommu_table_group_link */
 +	unsigned long *it_userspace; /* userspace view of the table */
+ #endif
  	struct iommu_table_ops *it_ops;
  };
  
@@ -163,7 +165,7 @@
  static inline __attribute_const__
  int get_iommu_order(unsigned long size, struct iommu_table *tbl)
 diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index 7a84110..cadd9f8 100644
+index 8943b29..e7e8db3 100644
 --- a/drivers/vfio/vfio_iommu_spapr_tce.c
 +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
 @@ -19,8 +19,10 @@
@@ -189,14 +191,14 @@
  /*
   * The container descriptor supports only a single group per container.
   * Required by the API as the container is not supplied with the IOMMU group
-@@ -88,11 +95,84 @@ static void decrement_locked_vm(long npages)
+@@ -88,11 +95,98 @@ static void decrement_locked_vm(long npages)
   */
  struct tce_container {
  	struct mutex lock;
 -	struct iommu_group *grp;
  	bool enabled;
+ 	unsigned long locked_pages;
 +	bool v2;
- 	unsigned long locked_pages;
 +	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
 +	struct list_head group_list;
  };
@@ -204,32 +206,46 @@
 +static long tce_iommu_unregister_pages(struct tce_container *container,
 +		__u64 vaddr, __u64 size)
 +{
++	long ret;
 +	struct mm_iommu_table_group_mem_t *mem;
 +
 +	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
 +		return -EINVAL;
 +
-+	mem = mm_iommu_lookup(vaddr, size >> PAGE_SHIFT);
++	mem = mm_iommu_get(vaddr, size >> PAGE_SHIFT);
 +	if (!mem)
 +		return -EINVAL;
 +
-+	return mm_iommu_put(mem);
++	ret = mm_iommu_put(mem); /* undo kref_get() from mm_iommu_get() */
++	if (!ret)
++		ret = mm_iommu_put(mem);
++
++	return ret;
 +}
 +
 +static long tce_iommu_register_pages(struct tce_container *container,
 +		__u64 vaddr, __u64 size)
 +{
 +	long ret = 0;
-+	struct mm_iommu_table_group_mem_t *mem = NULL;
++	struct mm_iommu_table_group_mem_t *mem;
 +	unsigned long entries = size >> PAGE_SHIFT;
 +
 +	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
 +			((vaddr + size) < vaddr))
 +		return -EINVAL;
 +
-+	ret = mm_iommu_get(vaddr, entries, &mem);
-+	if (ret)
-+		return ret;
++	mem = mm_iommu_get(vaddr, entries);
++	if (!mem) {
++		ret = try_increment_locked_vm(entries);
++		if (ret)
++			return ret;
++
++		ret = mm_iommu_alloc(vaddr, entries, &mem);
++		if (ret) {
++			decrement_locked_vm(entries);
++			return ret;
++		}
++	}
 +
 +	container->enabled = true;
 +
@@ -275,7 +291,7 @@
  static bool tce_page_is_contained(struct page *page, unsigned page_shift)
  {
  	/*
-@@ -103,18 +183,18 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
+@@ -103,18 +197,18 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
  	return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
  }
  
@@ -300,7 +316,7 @@
  
  		if (tbl) {
  			unsigned long entry = ioba >> tbl->it_page_shift;
-@@ -136,9 +216,7 @@ static int tce_iommu_enable(struct tce_container *container)
+@@ -136,9 +230,7 @@ static int tce_iommu_enable(struct tce_container *container)
  	int ret = 0;
  	unsigned long locked;
  	struct iommu_table_group *table_group;
@@ -311,7 +327,7 @@
  
  	if (!current->mm)
  		return -ESRCH; /* process exited */
-@@ -175,7 +253,12 @@ static int tce_iommu_enable(struct tce_container *container)
+@@ -175,7 +267,12 @@ static int tce_iommu_enable(struct tce_container *container)
  	 * as there is no way to know how much we should increment
  	 * the locked_vm counter.
  	 */
@@ -325,7 +341,7 @@
  	if (!table_group)
  		return -ENODEV;
  
-@@ -211,7 +294,7 @@ static void *tce_iommu_open(unsigned long arg)
+@@ -211,7 +308,7 @@ static void *tce_iommu_open(unsigned long arg)
  {
  	struct tce_container *container;
  
@@ -334,7 +350,7 @@
  		pr_err("tce_vfio: Wrong IOMMU type\n");
  		return ERR_PTR(-EINVAL);
  	}
-@@ -221,18 +304,45 @@ static void *tce_iommu_open(unsigned long arg)
+@@ -221,18 +318,45 @@ static void *tce_iommu_open(unsigned long arg)
  		return ERR_PTR(-ENOMEM);
  
  	mutex_init(&container->lock);
@@ -383,7 +399,7 @@
  
  	tce_iommu_disable(container);
  	mutex_destroy(&container->lock);
-@@ -249,6 +359,47 @@ static void tce_iommu_unuse_page(struct tce_container *container,
+@@ -249,6 +373,47 @@ static void tce_iommu_unuse_page(struct tce_container *container,
  	put_page(page);
  }
  
@@ -431,7 +447,7 @@
  static int tce_iommu_clear(struct tce_container *container,
  		struct iommu_table *tbl,
  		unsigned long entry, unsigned long pages)
-@@ -267,6 +418,11 @@ static int tce_iommu_clear(struct tce_container *container,
+@@ -267,6 +432,11 @@ static int tce_iommu_clear(struct tce_container *container,
  		if (direction == DMA_NONE)
  			continue;
  
@@ -443,7 +459,7 @@
  		tce_iommu_unuse_page(container, oldhpa);
  	}
  
-@@ -333,6 +489,64 @@ static long tce_iommu_build(struct tce_container *container,
+@@ -333,6 +503,64 @@ static long tce_iommu_build(struct tce_container *container,
  	return ret;
  }
  
@@ -508,7 +524,7 @@
  static long tce_iommu_create_table(struct tce_container *container,
  			struct iommu_table_group *table_group,
  			int num,
-@@ -358,6 +572,12 @@ static long tce_iommu_create_table(struct tce_container *container,
+@@ -358,6 +586,12 @@ static long tce_iommu_create_table(struct tce_container *container,
  	WARN_ON(!ret && !(*ptbl)->it_ops->free);
  	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
  
@@ -521,7 +537,7 @@
  	if (ret)
  		decrement_locked_vm(table_size >> PAGE_SHIFT);
  
-@@ -368,6 +588,7 @@ static void tce_iommu_free_table(struct iommu_table *tbl)
+@@ -368,6 +602,7 @@ static void tce_iommu_free_table(struct iommu_table *tbl)
  {
  	unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
  
@@ -529,7 +545,7 @@
  	tbl->it_ops->free(tbl);
  	decrement_locked_vm(pages);
  }
-@@ -383,6 +604,7 @@ static long tce_iommu_ioctl(void *iommu_data,
+@@ -383,6 +618,7 @@ static long tce_iommu_ioctl(void *iommu_data,
  	case VFIO_CHECK_EXTENSION:
  		switch (arg) {
  		case VFIO_SPAPR_TCE_IOMMU:
@@ -537,7 +553,7 @@
  			ret = 1;
  			break;
  		default:
-@@ -394,12 +616,15 @@ static long tce_iommu_ioctl(void *iommu_data,
+@@ -394,12 +630,15 @@ static long tce_iommu_ioctl(void *iommu_data,
  
  	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
  		struct vfio_iommu_spapr_tce_info info;
@@ -555,7 +571,7 @@
  
  		if (!table_group)
  			return -ENXIO;
-@@ -468,11 +693,18 @@ static long tce_iommu_ioctl(void *iommu_data,
+@@ -467,11 +706,18 @@ static long tce_iommu_ioctl(void *iommu_data,
  		if (ret)
  			return ret;
  
@@ -579,7 +595,7 @@
  
  		iommu_flush_tce(tbl);
  
-@@ -518,7 +750,61 @@ static long tce_iommu_ioctl(void *iommu_data,
+@@ -517,7 +763,61 @@ static long tce_iommu_ioctl(void *iommu_data,
  
  		return ret;
  	}
@@ -629,10 +645,10 @@
 +			return -EINVAL;
 +
 +		mutex_lock(&container->lock);
-+		ret = tce_iommu_unregister_pages(container, param.vaddr, param.size);
++		tce_iommu_unregister_pages(container, param.vaddr, param.size);
 +		mutex_unlock(&container->lock);
 +
-+		return ret;
++		return 0;
 +	}
  	case VFIO_IOMMU_ENABLE:
 +		if (container->v2)
@@ -641,7 +657,7 @@
  		mutex_lock(&container->lock);
  		ret = tce_iommu_enable(container);
  		mutex_unlock(&container->lock);
-@@ -526,16 +812,27 @@ static long tce_iommu_ioctl(void *iommu_data,
+@@ -525,16 +825,27 @@ static long tce_iommu_ioctl(void *iommu_data,
  
  
  	case VFIO_IOMMU_DISABLE:
@@ -674,7 +690,7 @@
  	}
  
  	return -ENOTTY;
-@@ -547,14 +844,17 @@ static void tce_iommu_release_ownership(struct tce_container *container,
+@@ -546,14 +857,17 @@ static void tce_iommu_release_ownership(struct tce_container *container,
  	int i;
  
  	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
@@ -693,7 +709,7 @@
  	}
  }
  
-@@ -569,7 +869,10 @@ static int tce_iommu_take_ownership(struct tce_container *container,
+@@ -568,7 +882,10 @@ static int tce_iommu_take_ownership(struct tce_container *container,
  		if (!tbl || !tbl->it_map)
  			continue;
  
@@ -705,7 +721,7 @@
  		if (rc) {
  			for (j = 0; j < i; ++j)
  				iommu_release_ownership(
-@@ -579,6 +882,9 @@ static int tce_iommu_take_ownership(struct tce_container *container,
+@@ -578,38 +895,57 @@ static int tce_iommu_take_ownership(struct tce_container *container,
  		}
  	}
  
@@ -715,99 +731,15 @@
  	return 0;
  }
  
-@@ -592,18 +898,8 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
- 		return;
- 	}
- 
--	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
--		/* Store table pointer as unset_window resets it */
--		struct iommu_table *tbl = table_group->tables[i];
--
--		if (!tbl)
--			continue;
--
-+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
- 		table_group->ops->unset_window(table_group, i);
--		tce_iommu_clear(container, tbl,
--				tbl->it_offset, tbl->it_size);
--		tce_iommu_free_table(tbl);
--	}
- 
- 	table_group->ops->release_ownership(table_group);
- }
-@@ -611,7 +907,7 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
- static long tce_iommu_take_ownership_ddw(struct tce_container *container,
- 		struct iommu_table_group *table_group)
+ static int tce_iommu_attach_group(void *iommu_data,
+ 		struct iommu_group *iommu_group)
  {
--	long ret;
-+	long i, ret = 0;
- 	struct iommu_table *tbl = NULL;
- 
- 	if (!table_group->ops->create_table || !table_group->ops->set_window ||
-@@ -622,23 +918,45 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
- 
- 	table_group->ops->take_ownership(table_group);
- 
--	ret = tce_iommu_create_table(container,
--			table_group,
--			0, /* window number */
--			IOMMU_PAGE_SHIFT_4K,
--			table_group->tce32_size,
--			1, /* default levels */
--			&tbl);
--	if (!ret) {
--		ret = table_group->ops->set_window(table_group, 0, tbl);
-+	/*
-+	 * If it the first group attached, check if there is
-+	 * a default DMA window and create one if none as
-+	 * the userspace expects it to exist.
-+	 */
-+	if (!tce_groups_attached(container) && !container->tables[0]) {
-+		ret = tce_iommu_create_table(container,
-+				table_group,
-+				0, /* window number */
-+				IOMMU_PAGE_SHIFT_4K,
-+				table_group->tce32_size,
-+				1, /* default levels */
-+				&tbl);
- 		if (ret)
--			tce_iommu_free_table(tbl);
-+			goto release_exit;
- 		else
--			table_group->tables[0] = tbl;
-+			container->tables[0] = tbl;
- 	}
- 
--	if (ret)
--		table_group->ops->release_ownership(table_group);
-+	/* Set all windows to the new group */
-+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-+		tbl = container->tables[i];
-+
-+		if (!tbl)
-+			continue;
-+
-+		/* Set the default window to a new group */
-+		ret = table_group->ops->set_window(table_group, i, tbl);
-+		if (ret)
-+			goto release_exit;
-+	}
-+
-+	return 0;
-+
-+release_exit:
-+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
-+		table_group->ops->unset_window(table_group, i);
-+
-+	table_group->ops->release_ownership(table_group);
- 
- 	return ret;
- }
-@@ -649,29 +967,44 @@ static int tce_iommu_attach_group(void *iommu_data,
- 	int ret;
+-	int ret;
++	int ret, i;
  	struct tce_container *container = iommu_data;
  	struct iommu_table_group *table_group;
 +	struct tce_iommu_group *tcegrp = NULL;
++	bool first_group = !tce_groups_attached(container);
  
  	mutex_lock(&container->lock);
  
@@ -832,7 +764,7 @@
 -	if (!table_group) {
 -		ret = -ENXIO;
 +
-+	if (tce_groups_attached(container) && (!table_group->ops ||
++	if (!first_group && (!table_group->ops ||
 +			!table_group->ops->take_ownership ||
 +			!table_group->ops->release_ownership)) {
 +		ret = -EBUSY;
@@ -865,16 +797,61 @@
  		goto unlock_exit;
  	}
  
-@@ -681,10 +1014,15 @@ static int tce_iommu_attach_group(void *iommu_data,
- 	else
- 		ret = tce_iommu_take_ownership_ddw(container, table_group);
- 
--	if (!ret)
--		container->grp = iommu_group;
-+	if (!ret) {
-+		tcegrp->grp = iommu_group;
-+		list_add(&tcegrp->next, &container->group_list);
-+	}
+@@ -628,28 +964,50 @@ static int tce_iommu_attach_group(void *iommu_data,
+ 		 * the pages that has been explicitly mapped into the iommu
+ 		 */
+ 		table_group->ops->take_ownership(table_group);
+-		ret = tce_iommu_create_table(container,
+-				table_group,
+-				0, /* window number */
+-				IOMMU_PAGE_SHIFT_4K,
+-				table_group->tce32_size,
+-				1, /* default levels */
+-				&tbl);
+-		if (!ret) {
+-			ret = table_group->ops->set_window(table_group, 0, tbl);
++
++		/*
++		 * If it the first group attached, check if there is
++		 * a default DMA window and create one if none as
++		 * the userspace expects it to exist.
++		 */
++		if (first_group && !container->tables[0]) {
++			ret = tce_iommu_create_table(container,
++					table_group,
++					0, /* window number */
++					IOMMU_PAGE_SHIFT_4K,
++					table_group->tce32_size,
++					1, /* default levels */
++					&tbl);
+ 			if (ret)
+-				tce_iommu_free_table(tbl);
++				goto unlock_exit;
+ 			else
+-				table_group->tables[0] = tbl;
++				container->tables[0] = tbl;
++		}
++
++		/* Set all windows to the new group */
++		for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
++			tbl = container->tables[i];
++
++			if (!tbl)
++				continue;
++
++			/* Set the default window to a new group */
++			ret = table_group->ops->set_window(table_group, i, tbl);
++			if (ret)
++				break;
+ 		}
+ 	}
+ 
+ 	if (ret)
+ 		goto unlock_exit;
+ 
+-	container->grp = iommu_group;
++	tcegrp->grp = iommu_group;
++	list_add(&tcegrp->next, &container->group_list);
  
  unlock_exit:
 +	if (ret && tcegrp)
@@ -883,12 +860,13 @@
  	mutex_unlock(&container->lock);
  
  	return ret;
-@@ -695,24 +1033,26 @@ static void tce_iommu_detach_group(void *iommu_data,
+@@ -660,25 +1018,27 @@ static void tce_iommu_detach_group(void *iommu_data,
  {
  	struct tce_container *container = iommu_data;
  	struct iommu_table_group *table_group;
++	struct tce_iommu_group *tcegrp;
+ 	long i;
 +	bool found = false;
-+	struct tce_iommu_group *tcegrp;
  
  	mutex_lock(&container->lock);
 -	if (iommu_group != container->grp) {
@@ -923,6 +901,26 @@
  
  	table_group = iommu_group_get_iommudata(iommu_group);
  	BUG_ON(!table_group);
+@@ -689,18 +1049,8 @@ static void tce_iommu_detach_group(void *iommu_data,
+ 	else if (!table_group->ops->unset_window)
+ 		WARN_ON_ONCE(1);
+ 	else {
+-		for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+-			/* Store table pointer as unset_window resets it */
+-			struct iommu_table *tbl = table_group->tables[i];
+-
+-			if (!tbl)
+-				continue;
+-
++		for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+ 			table_group->ops->unset_window(table_group, i);
+-			tce_iommu_clear(container, tbl,
+-					tbl->it_offset, tbl->it_size);
+-			tce_iommu_free_table(tbl);
+-		}
+ 
+ 		table_group->ops->release_ownership(table_group);
+ 	}
 diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
 index b57b750..8fdcfb9 100644
 --- a/include/uapi/linux/vfio.h
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help