Inter-revision diff: patch 25

Comparing v8 (message) to v9 (message)

--- v8
+++ v9
@@ -4,96 +4,105 @@
 
 This introduces a new IOMMU ownership flavour when the user can not
 just control the existing IOMMU table but remove/create tables on demand.
-If an IOMMU implements a set_ownership() callback, this lets the user have
-full control over the IOMMU group. When the ownership is taken,
+If an IOMMU implements take/release_ownership() callbacks, this lets
+the user have full control over the IOMMU group. When the ownership is taken,
 the platform code removes all the windows so the caller must create them.
 Before returning the ownership back to the platform code, VFIO
 unprograms and removes all the tables it created.
 
+This changes IODA2's onwership handler to remove the existing table
+rather than manipulating with the existing one. From now on,
+iommu_take_ownership() and iommu_release_ownership() are only called
+from the vfio_iommu_spapr_tce driver.
+
+In tce_iommu_detach_group(), this copies a iommu_table descriptor on stack
+as IODA2's unset_window() will clear the descriptor embedded into PE
+and we will not be able to free the table afterwards.
+This is a transitional hack and following patches will replace this code
+anyway.
+
 Old-style ownership is still supported allowing VFIO to run on older
 P5IOC2 and IODA IO controllers.
 
+No change in userspace-visible behaviour is expected. Since it recreates
+TCE tables on each ownership change, related kernel traces will appear
+more often.
+
 Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+[aw: for the vfio related changes]
+Acked-by: Alex Williamson <alex.williamson@redhat.com>
 ---
 Changes:
+v9:
+* fixed crash in tce_iommu_detach_group() on tbl->it_ops->free as
+tce_iommu_attach_group() used to initialize the table from a descriptor
+on stack (it does not matter for the series as this bit is changed later anyway
+but it ruing bisectability)
+
 v6:
 * fixed commit log that VFIO removes tables before passing ownership
 back to the platform code, not userspace
+
+1
 ---
- arch/powerpc/platforms/powernv/pci-ioda.c | 30 +++++++++++++++---
- drivers/vfio/vfio_iommu_spapr_tce.c       | 51 ++++++++++++++++++++++++-------
- 2 files changed, 66 insertions(+), 15 deletions(-)
+ arch/powerpc/platforms/powernv/pci-ioda.c | 27 +++++++++++++++++++++++--
+ drivers/vfio/vfio_iommu_spapr_tce.c       | 33 +++++++++++++++++++++++++++++--
+ 2 files changed, 56 insertions(+), 4 deletions(-)
 
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
-index ab0cfb7..751aeab 100644
+index 2a4b2b2..45bc131 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
-@@ -1626,11 +1626,33 @@ static void pnv_ioda2_set_ownership(struct iommu_table_group *table_group,
+@@ -2105,16 +2105,39 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+ 	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ 						table_group);
+ 
+-	iommu_take_ownership(&table_group->tables[0]);
+ 	pnv_pci_ioda2_set_bypass(pe, false);
++	pnv_pci_ioda2_unset_window(&pe->table_group, 0);
++	pnv_pci_free_table(&pe->table_group.tables[0]);
+ }
+ 
+ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
  {
  	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
  						table_group);
--	if (enable)
--		iommu_take_ownership(table_group);
--	else
--		iommu_release_ownership(table_group);
-+	if (enable) {
-+		pnv_pci_ioda2_unset_window(&pe->table_group, 0);
-+		pnv_pci_free_table(&pe->table_group.tables[0]);
-+	} else {
-+		struct iommu_table *tbl = &pe->table_group.tables[0];
-+		int64_t rc;
++	struct iommu_table *tbl = &pe->table_group.tables[0];
++	int64_t rc;
++
++	rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
++			IOMMU_PAGE_SHIFT_4K,
++			pe->phb->ioda.m32_pci_base,
++			POWERNV_IOMMU_DEFAULT_LEVELS, tbl);
++	if (rc) {
++		pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
++				rc);
++		return;
++	}
++
++	tbl->it_table_group = &pe->table_group;
++	iommu_init_table(tbl, pe->phb->hose->node);
++
++	rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
++	if (rc) {
++		pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
++				rc);
++		pnv_pci_free_table(tbl);
++		return;
++	}
  
-+		rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
-+				IOMMU_PAGE_SHIFT_4K,
-+				pe->phb->ioda.m32_pci_base,
-+				POWERNV_IOMMU_DEFAULT_LEVELS, tbl);
-+		if (rc) {
-+			pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
-+					rc);
-+			return;
-+		}
-+
-+		iommu_init_table(tbl, pe->phb->hose->node);
-+
-+		rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
-+		if (rc) {
-+			pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
-+					rc);
-+			pnv_pci_free_table(tbl);
-+			return;
-+		}
-+	}
- 	pnv_pci_ioda2_set_bypass(pe, !enable);
+-	iommu_release_ownership(&table_group->tables[0]);
+ 	pnv_pci_ioda2_set_bypass(pe, true);
  }
  
 diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index 7c3c215..9aeaed6 100644
+index 2d51bbf..892a584 100644
 --- a/drivers/vfio/vfio_iommu_spapr_tce.c
 +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
-@@ -226,18 +226,11 @@ static int tce_iommu_clear(struct tce_container *container,
- static void tce_iommu_release(void *iommu_data)
- {
- 	struct tce_container *container = iommu_data;
--	struct iommu_table *tbl;
--	struct iommu_table_group *table_group;
- 
- 	WARN_ON(container->grp);
- 
--	if (container->grp) {
--		table_group = iommu_group_get_iommudata(container->grp);
--		tbl = &table_group->tables[0];
--		tce_iommu_clear(container, tbl,	tbl->it_offset, tbl->it_size);
--
-+	if (container->grp)
- 		tce_iommu_detach_group(iommu_data, container->grp);
--	}
- 
- 	tce_iommu_disable(container);
- 
-@@ -553,14 +546,24 @@ static int tce_iommu_attach_group(void *iommu_data,
- 
- 	if (!table_group->ops || !table_group->ops->set_ownership) {
- 		ret = iommu_take_ownership(table_group);
+@@ -569,6 +569,10 @@ static int tce_iommu_attach_group(void *iommu_data,
+ 	if (!table_group->ops || !table_group->ops->take_ownership ||
+ 			!table_group->ops->release_ownership) {
+ 		ret = tce_iommu_take_ownership(table_group);
 +	} else if (!table_group->ops->create_table ||
 +			!table_group->ops->set_window) {
 +		WARN_ON_ONCE(1);
@@ -101,22 +110,24 @@
  	} else {
  		/*
  		 * Disable iommu bypass, otherwise the user can DMA to all of
- 		 * our physical memory via the bypass window instead of just
+@@ -576,7 +580,15 @@ static int tce_iommu_attach_group(void *iommu_data,
  		 * the pages that has been explicitly mapped into the iommu
  		 */
-+		struct iommu_table tbltmp = { 0 }, *tbl = &tbltmp;
-+
- 		table_group->ops->set_ownership(table_group, true);
+ 		table_group->ops->take_ownership(table_group);
 -		ret = 0;
-+		ret = table_group->ops->create_table(table_group, 0,
++		ret = table_group->ops->create_table(table_group,
++				0, /* window number */
 +				IOMMU_PAGE_SHIFT_4K,
-+				table_group->tce32_size, 1, tbl);
++				table_group->tce32_size,
++				1, /* default levels */
++				&table_group->tables[0]);
 +		if (!ret)
-+			ret = table_group->ops->set_window(table_group, 0, tbl);
++			ret = table_group->ops->set_window(table_group, 0,
++					&table_group->tables[0]);
  	}
  
  	if (ret)
-@@ -579,6 +582,7 @@ static void tce_iommu_detach_group(void *iommu_data,
+@@ -595,6 +607,7 @@ static void tce_iommu_detach_group(void *iommu_data,
  {
  	struct tce_container *container = iommu_data;
  	struct iommu_table_group *table_group;
@@ -124,40 +135,28 @@
  
  	mutex_lock(&container->lock);
  	if (iommu_group != container->grp) {
-@@ -602,10 +606,35 @@ static void tce_iommu_detach_group(void *iommu_data,
- 	BUG_ON(!table_group);
- 
+@@ -620,8 +633,24 @@ static void tce_iommu_detach_group(void *iommu_data,
  	/* Kernel owns the device now, we can restore bypass */
--	if (!table_group->ops || !table_group->ops->set_ownership)
-+	if (!table_group->ops || !table_group->ops->set_ownership) {
+ 	if (!table_group->ops || !table_group->ops->release_ownership)
+ 		tce_iommu_release_ownership(container, table_group);
+-	else
++	else if (!table_group->ops->unset_window)
++		WARN_ON_ONCE(1);
++	else {
 +		for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-+			struct iommu_table *tbl = &table_group->tables[i];
++			struct iommu_table tbl = table_group->tables[i];
 +
-+			if (!tbl->it_size)
++			if (!tbl.it_size)
 +				continue;
 +
-+			if (!tbl->it_ops)
-+				goto unlock_exit;
-+			tce_iommu_clear(container, tbl,
-+					tbl->it_offset, tbl->it_size);
-+		}
- 		iommu_release_ownership(table_group);
--	else
-+	} else if (!table_group->ops->unset_window) {
-+		WARN_ON_ONCE(1);
-+	} else {
-+		for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-+			struct iommu_table *tbl = &table_group->tables[i];
-+
 +			table_group->ops->unset_window(table_group, i);
-+			tce_iommu_clear(container, tbl,
-+					tbl->it_offset, tbl->it_size);
-+
-+			if (tbl->it_ops->free)
-+				tbl->it_ops->free(tbl);
++			tce_iommu_clear(container, &tbl,
++					tbl.it_offset, tbl.it_size);
++			if (tbl.it_ops->free)
++				tbl.it_ops->free(&tbl);
 +		}
 +
- 		table_group->ops->set_ownership(table_group, false);
+ 		table_group->ops->release_ownership(table_group);
 +	}
  
  unlock_exit:
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help