Re: [PATCH v3 7/9] userfaultfd: add UFFDIO_CONTINUE ioctl
From: Peter Xu <peterx@redhat.com>
Date: 2021-02-01 19:22:57
Also in:
linux-fsdevel, lkml
On Thu, Jan 28, 2021 at 02:48:17PM -0800, Axel Rasmussen wrote:
quoted hunk ↗ jump to hunk
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f94a35296618..79e1f0155afa 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h@@ -135,11 +135,14 @@ void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); +#ifdef CONFIG_USERFAULTFD
I'm confused why this is needed.. hugetlb_mcopy_atomic_pte() should only be
called in userfaultfd.c, but if without uffd config set it won't compile
either:
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
quoted hunk ↗ jump to hunk
int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, + enum mcopy_atomic_mode mode, struct page **pagep); +#endif int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags);diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index fb9abaeb4194..2fcb686211e8 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h@@ -37,6 +37,22 @@ extern int sysctl_unprivileged_userfaultfd; extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); +/* + * The mode of operation for __mcopy_atomic and its helpers. + * + * This is almost an implementation detail (mcopy_atomic below doesn't take this + * as a parameter), but it's exposed here because memory-kind-specific + * implementations (e.g. hugetlbfs) need to know the mode of operation. + */ +enum mcopy_atomic_mode { + /* A normal copy_from_user into the destination range. */ + MCOPY_ATOMIC_NORMAL, + /* Don't copy; map the destination range to the zero page. */ + MCOPY_ATOMIC_ZEROPAGE, + /* Just setup the dst_vma, without modifying the underlying page(s). */ + MCOPY_ATOMIC_CONTINUE, +}; +
Maybe better to keep this to where it's used, e.g. hugetlb.h where we've defined hugetlb_mcopy_atomic_pte()? [...]
quoted hunk ↗ jump to hunk
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6f9d8349f818..3d318ef3d180 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c@@ -4647,6 +4647,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } +#ifdef CONFIG_USERFAULTFD
So I feel like you added the header ifdef for this. IMHO we can drop both since that's what we have had. I agree maybe it's better to not compile that without CONFIG_USERFAULTFD but that may worth a standalone patch anyways.
quoted hunk ↗ jump to hunk
/* * Used by userfaultfd UFFDIO_COPY. Based on mcopy_atomic_pte with * modifications for huge pages.@@ -4656,6 +4657,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, + enum mcopy_atomic_mode mode, struct page **pagep) { struct address_space *mapping;@@ -4668,7 +4670,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, int ret; struct page *page; - if (!*pagep) { + mapping = dst_vma->vm_file->f_mapping; + idx = vma_hugecache_offset(h, dst_vma, dst_addr); + + if (!*pagep && mode != MCOPY_ATOMIC_CONTINUE) { ret = -ENOMEM; page = alloc_huge_page(dst_vma, dst_addr, 0); if (IS_ERR(page))@@ -4685,6 +4690,12 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, /* don't free the page */ goto out; } + } else if (mode == MCOPY_ATOMIC_CONTINUE) { + ret = -EFAULT; + page = find_lock_page(mapping, idx); + *pagep = NULL; + if (!page) + goto out; } else { page = *pagep; *pagep = NULL;
I would write this as:
if (mode == MCOPY_ATOMIC_CONTINUE)
...
else if (!*pagep)
...
else
...
No strong opinion, but that'll look slightly cleaner to me.
[...]
quoted hunk ↗ jump to hunk
@@ -408,7 +407,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, - bool zeropage); + enum mcopy_atomic_mode mode); #endif /* CONFIG_HUGETLB_PAGE */ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,@@ -417,7 +416,7 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, unsigned long dst_addr, unsigned long src_addr, struct page **page, - bool zeropage, + enum mcopy_atomic_mode mode, bool wp_copy) { ssize_t err;@@ -433,22 +432,38 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, * and not in the radix tree. */ if (!(dst_vma->vm_flags & VM_SHARED)) { - if (!zeropage) + switch (mode) { + case MCOPY_ATOMIC_NORMAL: err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, page, wp_copy); - else + break; + case MCOPY_ATOMIC_ZEROPAGE: err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, dst_addr); + break; + /* It only makes sense to CONTINUE for shared memory. */ + case MCOPY_ATOMIC_CONTINUE: + err = -EINVAL; + break; + } } else { VM_WARN_ON_ONCE(wp_copy); - if (!zeropage) + switch (mode) { + case MCOPY_ATOMIC_NORMAL: err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, page); - else + break; + case MCOPY_ATOMIC_ZEROPAGE: err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, dst_addr); + break; + case MCOPY_ATOMIC_CONTINUE: + /* FIXME: Add minor fault interception for shmem. */ + err = -EINVAL; + break; + } } return err;
The whole chunk above is not needed for hugetlbfs it seems - I'd avoid touching
the anon/shmem code path until it's being supported.
What you need is probably set zeropage as below in __mcopy_atomic():
zeropage = (mode == MCOPY_ATOMIC_ZEROPAGE);
Before passing it over to mfill_atomic_pte(). As long as we reject
UFFDIO_CONTINUE with !hugetlbfs correctly that'll be enough iiuc.
Thanks,
--
Peter Xu