Thread (38 messages) 38 messages, 8 authors, 2021-02-25

Re: [PATCH RFC] mm/madvise: introduce MADV_POPULATE to prefault/prealloc memory

From: Michal Hocko <mhocko@suse.com>
Date: 2021-02-19 10:36:27
Also in: linux-arch, linux-mips, linux-mm, lkml

On Wed 17-02-21 16:48:44, David Hildenbrand wrote:
[...]

I only got  to the implementation now.
+static long madvise_populate(struct vm_area_struct *vma,
+			     struct vm_area_struct **prev,
+			     unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long tmp_end;
+	int locked = 1;
+	long pages;
+
+	*prev = vma;
+
+	while (start < end) {
+		/*
+		 * We might have temporarily dropped the lock. For example,
+		 * our VMA might have been split.
+		 */
+		if (!vma || start >= vma->vm_end) {
+			vma = find_vma(mm, start);
+			if (!vma)
+				return -ENOMEM;
+		}
Why do you need to find a vma when you already have one. do_madvise will
give you your vma already. I do understand that you want to finish the
vma for some errors but that shouldn't require handling vmas. You should
be in the shope of one here unless I miss anything.
quoted hunk ↗ jump to hunk
+
+		/* Bail out on incompatible VMA types. */
+		if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
+		    !vma_is_accessible(vma)) {
+			return -EINVAL;
+		}
+
+		/*
+		 * Populate pages and take care of VM_LOCKED: simulate user
+		 * space access.
+		 *
+		 * For private, writable mappings, trigger a write fault to
+		 * break COW (i.e., shared zeropage). For other mappings (i.e.,
+		 * read-only, shared), trigger a read fault.
+		 */
+		tmp_end = min_t(unsigned long, end, vma->vm_end);
+		pages = populate_vma_page_range(vma, start, tmp_end, &locked);
+		if (!locked) {
+			mmap_read_lock(mm);
+			*prev = NULL;
+			vma = NULL;
+		}
+		if (pages < 0) {
+			switch (pages) {
+			case -EINTR:
+			case -ENOMEM:
+				return pages;
+			case -EHWPOISON:
+				/* Skip over any poisoned pages. */
+				start += PAGE_SIZE;
+				continue;
+			case -EBUSY:
+			case -EAGAIN:
+				continue;
+			default:
+				pr_warn_once("%s: unhandled return value: %ld\n",
+					     __func__, pages);
+				return -ENOMEM;
+			}
+		}
+		start += pages * PAGE_SIZE;
+	}
+	return 0;
+}
+
 /*
  * Application wants to free up the pages and associated backing store.
  * This is effectively punching a hole into the middle of a file.
@@ -934,6 +1001,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	case MADV_FREE:
 	case MADV_DONTNEED:
 		return madvise_dontneed_free(vma, prev, start, end, behavior);
+	case MADV_POPULATE:
+		return madvise_populate(vma, prev, start, end);
 	default:
 		return madvise_behavior(vma, prev, start, end, behavior);
 	}
@@ -954,6 +1023,7 @@ madvise_behavior_valid(int behavior)
 	case MADV_FREE:
 	case MADV_COLD:
 	case MADV_PAGEOUT:
+	case MADV_POPULATE:
 #ifdef CONFIG_KSM
 	case MADV_MERGEABLE:
 	case MADV_UNMERGEABLE:
-- 
2.29.2
-- 
Michal Hocko
SUSE Labs
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help