[PATCH v3 1/6] iommu/core: split mapping to page sizes as supported by the hardware
From: Roedel, Joerg <hidden>
Date: 2011-10-10 09:49:09
Also in:
kvm, linux-omap, lkml
Hi Ohad, sorry, I was on vacation last week and had no time to look into this. On Sun, Oct 02, 2011 at 11:58:12AM -0400, Ohad Ben-Cohen wrote:
quoted hunk ↗ jump to hunk
drivers/iommu/iommu.c | 138 ++++++++++++++++++++++++++++++++++++++++--- drivers/iommu/omap-iovmm.c | 12 +--- include/linux/iommu.h | 6 +- virt/kvm/iommu.c | 4 +- 4 files changed, 137 insertions(+), 23 deletions(-)diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a7b0862..f23563f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c@@ -16,6 +16,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "%s: " fmt, __func__ + #include <linux/kernel.h> #include <linux/bug.h> #include <linux/types.h>@@ -23,15 +25,54 @@ #include <linux/slab.h> #include <linux/errno.h> #include <linux/iommu.h> +#include <linux/bitmap.h>
Is this still required?
static struct iommu_ops *iommu_ops;
+/* bitmap of supported page sizes */
+static unsigned long iommu_pgsize_bitmap;
+
+/* size of the smallest supported page (in bytes) */
+static unsigned int iommu_min_pagesz;
+
+/**
+ * register_iommu() - register an IOMMU hardware
+ * @ops: iommu handlers
+ * @pgsize_bitmap: bitmap of page sizes supported by the hardware
+ *
+ * Note: this is a temporary function, which will be removed once
+ * all IOMMU drivers are converted. The only reason it exists is to
+ * allow splitting the pgsizes changes to several patches in order to ease
+ * the review.
+ */
+void register_iommu_pgsize(struct iommu_ops *ops, unsigned long pgsize_bitmap)
+{
+ if (iommu_ops || iommu_pgsize_bitmap || !pgsize_bitmap)
+ BUG();
+
+ iommu_ops = ops;
+ iommu_pgsize_bitmap = pgsize_bitmap;
+
+ /* find out the minimum page size only once */
+ iommu_min_pagesz = 1 << __ffs(pgsize_bitmap);
+}Hmm, I thought a little bit about that and came to the conculusion it might be best to just keep the page-sizes as a part of the iommu_ops structure. So there is no need to extend the register_iommu interface. Also, the bus_set_iommu interface is now in the -next branch. Would be good if you rebase the patches to that interface. You can find the current iommu tree with these changes at git://git.8bytes.org/scm/iommu.git
quoted hunk ↗ jump to hunk
@@ -115,26 +156,103 @@ int iommu_domain_has_cap(struct iommu_domain *domain, EXPORT_SYMBOL_GPL(iommu_domain_has_cap); int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, int gfp_order, int prot) + phys_addr_t paddr, size_t size, int prot) { - size_t size; + int ret = 0; + + /* + * both the virtual address and the physical one, as well as + * the size of the mapping, must be aligned (at least) to the + * size of the smallest page supported by the hardware + */ + if (!IS_ALIGNED(iova | paddr | size, iommu_min_pagesz)) { + pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz " + "0x%x\n", iova, (unsigned long)paddr, + (unsigned long)size, iommu_min_pagesz); + return -EINVAL; + } - size = 0x1000UL << gfp_order; + pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova, + (unsigned long)paddr, (unsigned long)size); - BUG_ON(!IS_ALIGNED(iova | paddr, size)); + while (size) { + unsigned long pgsize, addr_merge = iova | paddr; + unsigned int pgsize_idx; - return iommu_ops->map(domain, iova, paddr, gfp_order, prot); + /* Max page size that still fits into 'size' */ + pgsize_idx = __fls(size); + + /* need to consider alignment requirements ? */ + if (likely(addr_merge)) { + /* Max page size allowed by both iova and paddr */ + unsigned int align_pgsize_idx = __ffs(addr_merge); + + pgsize_idx = min(pgsize_idx, align_pgsize_idx); + } + + /* build a mask of acceptable page sizes */ + pgsize = (1UL << (pgsize_idx + 1)) - 1; + + /* throw away page sizes not supported by the hardware */ + pgsize &= iommu_pgsize_bitmap;
I think we need some care here and check pgsize for 0. A BUG_ON should do.
+
+ /* pick the biggest page */
+ pgsize_idx = __fls(pgsize);
+ pgsize = 1UL << pgsize_idx;
+
+ /* convert index to page order */
+ pgsize_idx -= PAGE_SHIFT;
+
+ pr_debug("mapping: iova 0x%lx pa 0x%lx order %u\n", iova,
+ (unsigned long)paddr, pgsize_idx);
+
+ ret = iommu_ops->map(domain, iova, paddr, pgsize_idx, prot);
+ if (ret)
+ break;
+
+ iova += pgsize;
+ paddr += pgsize;
+ size -= pgsize;
+ }
+
+ return ret;
}
EXPORT_SYMBOL_GPL(iommu_map);Otherwise this looks much better, thanks.
-int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
+int iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
{
- size_t size;
+ int order, unmapped_size, unmapped_order, total_unmapped = 0;
+
+ /*
+ * The virtual address, as well as the size of the mapping, must be
+ * aligned (at least) to the size of the smallest page supported
+ * by the hardware
+ */
+ if (!IS_ALIGNED(iova | size, iommu_min_pagesz)) {
+ pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
+ iova, (unsigned long)size, iommu_min_pagesz);
+ return -EINVAL;
+ }
+
+ pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
+ (unsigned long)size);
+
+ while (size > total_unmapped) {
+ order = get_order(size - total_unmapped);
+
+ unmapped_order = iommu_ops->unmap(domain, iova, order);I think we should make sure that we call iommu_ops->unmap with the same parameters as iommu_ops->map. Otherwise we still need some page-size complexity in the iommu-drivers.
+ if (unmapped_order < 0)
+ return unmapped_order;
+
+ pr_debug("unmapped: iova 0x%lx order %d\n", iova,
+ unmapped_order);
- size = 0x1000UL << gfp_order;
+ unmapped_size = 0x1000UL << unmapped_order;
- BUG_ON(!IS_ALIGNED(iova, size));
+ iova += unmapped_size;
+ total_unmapped += unmapped_size;
+ }
- return iommu_ops->unmap(domain, iova, gfp_order);
+ return get_order(total_unmapped);
}When we pass the size now it makes sense to also return the unmapped-size instead of the order. Also the semantics of iommu_unmap need some more discussion I think. We should require that iommu_map/iommu_unmap are called with the same parameters for the same range. Otherwise we can give no guarantees what a unmap-call will unmap. Joerg -- AMD Operating System Research Center Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach General Managers: Alberto Bozzo, Andrew Bowd Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632