--- v6
+++ v9
@@ -1,146 +1,53 @@
-As it says in the updated comment in gup.c: current FOLL_LONGTERM
-behavior is incompatible with FAULT_FLAG_ALLOW_RETRY because of the
-FS DAX check requirement on vmas.
+And get rid of the mmap_sem calls, as part of that. Note
+that get_user_pages_fast() will, if necessary, fall back to
+__gup_longterm_unlocked(), which takes the mmap_sem as needed.
-However, the corresponding restriction in get_user_pages_remote() was
-slightly stricter than is actually required: it forbade all
-FOLL_LONGTERM callers, but we can actually allow FOLL_LONGTERM callers
-that do not set the "locked" arg.
-
-Update the code and comments accordingly, and update the VFIO caller
-to take advantage of this, fixing a bug as a result: the VFIO caller
-is logically a FOLL_LONGTERM user.
-
-Also, remove an unnessary pair of calls that were releasing and
-reacquiring the mmap_sem. There is no need to avoid holding mmap_sem
-just in order to call page_to_pfn().
-
-Also, move the DAX check ("if a VMA is DAX, don't allow long term
-pinning") from the VFIO call site, all the way into the internals
-of get_user_pages_remote() and __gup_longterm_locked(). That is:
-get_user_pages_remote() calls __gup_longterm_locked(), which in turn
-calls check_dax_vmas(). It's lightly explained in the comments as well.
-
-Thanks to Jason Gunthorpe for pointing out a clean way to fix this,
-and to Dan Williams for helping clarify the DAX refactoring.
-
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
-Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
-Cc: Dan Williams <dan.j.williams@intel.com>
-Cc: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
- drivers/vfio/vfio_iommu_type1.c | 30 +++++-------------------------
- mm/gup.c | 27 ++++++++++++++++++++++-----
- 2 files changed, 27 insertions(+), 30 deletions(-)
+ drivers/infiniband/core/umem.c | 17 ++++++-----------
+ 1 file changed, 6 insertions(+), 11 deletions(-)
-diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
-index d864277ea16f..c7a111ad9975 100644
---- a/drivers/vfio/vfio_iommu_type1.c
-+++ b/drivers/vfio/vfio_iommu_type1.c
-@@ -340,7 +340,6 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
- {
- struct page *page[1];
- struct vm_area_struct *vma;
-- struct vm_area_struct *vmas[1];
- unsigned int flags = 0;
- int ret;
+diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
+index 7a3b99597ead..214e87aa609d 100644
+--- a/drivers/infiniband/core/umem.c
++++ b/drivers/infiniband/core/umem.c
+@@ -266,16 +266,13 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
+ sg = umem->sg_head.sgl;
-@@ -348,33 +347,14 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
- flags |= FOLL_WRITE;
+ while (npages) {
+- down_read(&mm->mmap_sem);
+- ret = get_user_pages(cur_base,
+- min_t(unsigned long, npages,
+- PAGE_SIZE / sizeof (struct page *)),
+- gup_flags | FOLL_LONGTERM,
+- page_list, NULL);
+- if (ret < 0) {
+- up_read(&mm->mmap_sem);
++ ret = get_user_pages_fast(cur_base,
++ min_t(unsigned long, npages,
++ PAGE_SIZE /
++ sizeof(struct page *)),
++ gup_flags | FOLL_LONGTERM, page_list);
++ if (ret < 0)
+ goto umem_release;
+- }
- down_read(&mm->mmap_sem);
-- if (mm == current->mm) {
-- ret = get_user_pages(vaddr, 1, flags | FOLL_LONGTERM, page,
-- vmas);
-- } else {
-- ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
-- vmas, NULL);
-- /*
-- * The lifetime of a vaddr_get_pfn() page pin is
-- * userspace-controlled. In the fs-dax case this could
-- * lead to indefinite stalls in filesystem operations.
-- * Disallow attempts to pin fs-dax pages via this
-- * interface.
-- */
-- if (ret > 0 && vma_is_fsdax(vmas[0])) {
-- ret = -EOPNOTSUPP;
-- put_page(page[0]);
-- }
-- }
-- up_read(&mm->mmap_sem);
+ cur_base += ret * PAGE_SIZE;
+ npages -= ret;
+@@ -283,8 +280,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
+ sg = ib_umem_add_sg_table(sg, page_list, ret,
+ dma_get_max_seg_size(context->device->dma_device),
+ &umem->sg_nents);
-
-+ ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags | FOLL_LONGTERM,
-+ page, NULL, NULL);
- if (ret == 1) {
- *pfn = page_to_pfn(page[0]);
-- return 0;
-+ ret = 0;
-+ goto done;
+- up_read(&mm->mmap_sem);
}
-- down_read(&mm->mmap_sem);
--
- vaddr = untagged_addr(vaddr);
-
- vma = find_vma_intersection(mm, vaddr, vaddr + 1);
-@@ -384,7 +364,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
- if (is_invalid_reserved_pfn(*pfn))
- ret = 0;
- }
--
-+done:
- up_read(&mm->mmap_sem);
- return ret;
- }
-diff --git a/mm/gup.c b/mm/gup.c
-index 14fcdc502166..cce2c9676853 100644
---- a/mm/gup.c
-+++ b/mm/gup.c
-@@ -29,6 +29,13 @@ struct follow_page_context {
- unsigned int page_mask;
- };
-
-+static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
-+ struct mm_struct *mm,
-+ unsigned long start,
-+ unsigned long nr_pages,
-+ struct page **pages,
-+ struct vm_area_struct **vmas,
-+ unsigned int flags);
- /*
- * Return the compound head page with ref appropriately incremented,
- * or NULL if that failed.
-@@ -1167,13 +1174,23 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
- struct vm_area_struct **vmas, int *locked)
- {
- /*
-- * FIXME: Current FOLL_LONGTERM behavior is incompatible with
-+ * Parts of FOLL_LONGTERM behavior are incompatible with
- * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-- * vmas. As there are no users of this flag in this call we simply
-- * disallow this option for now.
-+ * vmas. However, this only comes up if locked is set, and there are
-+ * callers that do request FOLL_LONGTERM, but do not set locked. So,
-+ * allow what we can.
- */
-- if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
-- return -EINVAL;
-+ if (gup_flags & FOLL_LONGTERM) {
-+ if (WARN_ON_ONCE(locked))
-+ return -EINVAL;
-+ /*
-+ * This will check the vmas (even if our vmas arg is NULL)
-+ * and return -ENOTSUPP if DAX isn't allowed in this case:
-+ */
-+ return __gup_longterm_locked(tsk, mm, start, nr_pages, pages,
-+ vmas, gup_flags | FOLL_TOUCH |
-+ FOLL_REMOTE);
-+ }
-
- return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
- locked,
+ sg_mark_end(sg);
--
2.24.0