Re: [PATCH RFC] mm/madvise: introduce MADV_POPULATE to prefault/prealloc memory
From: Michal Hocko <mhocko@suse.com>
Date: 2021-02-19 10:36:27
Also in:
linux-arch, linux-mips, linux-mm, lkml
On Wed 17-02-21 16:48:44, David Hildenbrand wrote: [...] I only got to the implementation now.
+static long madvise_populate(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long tmp_end;
+ int locked = 1;
+ long pages;
+
+ *prev = vma;
+
+ while (start < end) {
+ /*
+ * We might have temporarily dropped the lock. For example,
+ * our VMA might have been split.
+ */
+ if (!vma || start >= vma->vm_end) {
+ vma = find_vma(mm, start);
+ if (!vma)
+ return -ENOMEM;
+ }Why do you need to find a vma when you already have one. do_madvise will give you your vma already. I do understand that you want to finish the vma for some errors but that shouldn't require handling vmas. You should be in the shope of one here unless I miss anything.
quoted hunk ↗ jump to hunk
+ + /* Bail out on incompatible VMA types. */ + if (vma->vm_flags & (VM_IO | VM_PFNMAP) || + !vma_is_accessible(vma)) { + return -EINVAL; + } + + /* + * Populate pages and take care of VM_LOCKED: simulate user + * space access. + * + * For private, writable mappings, trigger a write fault to + * break COW (i.e., shared zeropage). For other mappings (i.e., + * read-only, shared), trigger a read fault. + */ + tmp_end = min_t(unsigned long, end, vma->vm_end); + pages = populate_vma_page_range(vma, start, tmp_end, &locked); + if (!locked) { + mmap_read_lock(mm); + *prev = NULL; + vma = NULL; + } + if (pages < 0) { + switch (pages) { + case -EINTR: + case -ENOMEM: + return pages; + case -EHWPOISON: + /* Skip over any poisoned pages. */ + start += PAGE_SIZE; + continue; + case -EBUSY: + case -EAGAIN: + continue; + default: + pr_warn_once("%s: unhandled return value: %ld\n", + __func__, pages); + return -ENOMEM; + } + } + start += pages * PAGE_SIZE; + } + return 0; +} + /* * Application wants to free up the pages and associated backing store. * This is effectively punching a hole into the middle of a file.@@ -934,6 +1001,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, case MADV_FREE: case MADV_DONTNEED: return madvise_dontneed_free(vma, prev, start, end, behavior); + case MADV_POPULATE: + return madvise_populate(vma, prev, start, end); default: return madvise_behavior(vma, prev, start, end, behavior); }@@ -954,6 +1023,7 @@ madvise_behavior_valid(int behavior) case MADV_FREE: case MADV_COLD: case MADV_PAGEOUT: + case MADV_POPULATE: #ifdef CONFIG_KSM case MADV_MERGEABLE: case MADV_UNMERGEABLE:-- 2.29.2
-- Michal Hocko SUSE Labs