Thread (2 messages) 2 messages, 2 authors, 2024-08-08

Re: [PATCH RFC 4/4] mm: guest_memfd: Add ability for mmap'ing pages

From: Ackerley Tng <hidden>
Date: 2024-08-08 18:51:16
Also in: kvm, linux-arm-msm, linux-mm, lkml

Possibly related (same subject, not in this thread)

Elliot Berman [off-list ref] writes:
Confidential/protected guest virtual machines want to share some memory
back with the host Linux. For example, virtqueues allow host and
protected guest to exchange data. In MMU-only isolation of protected
guest virtual machines, the transition between "shared" and "private"
can be done in-place without a trusted hypervisor copying pages.

Add support for this feature and allow Linux to mmap host-accessible
pages. When the owner provides an ->accessible() callback in the
struct guest_memfd_operations, guest_memfd allows folios to be mapped
when the ->accessible() callback returns 0.

To safely make inaccessible:
folio = guest_memfd_grab_folio(inode, index, flags);
r = guest_memfd_make_inaccessible(inode, folio);
if (r)
        goto err;

hypervisor_does_guest_mapping(folio);

folio_unlock(folio);
hypervisor_does_s2_mapping(folio) should make it so
ops->accessible(...) on those folios fails.

The folio lock ensures atomicity.
I am also working on determining faultability not based on the
private-ness of the page but based on permission given by the
guest. I'd like to learn from what you've discovered here.

Could you please elaborate on this? What races is the folio_lock
intended to prevent, what operations are we ensuring atomicity of?

Is this why you did a guest_memfd_grab_folio() before checking
->accessible(), and then doing folio_unlock() if the page is
inaccessible?
quoted hunk
Signed-off-by: Elliot Berman <redacted>
---
 include/linux/guest_memfd.h |  7 ++++
 mm/guest_memfd.c            | 81 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/include/linux/guest_memfd.h b/include/linux/guest_memfd.h
index f9e4a27aed67..edcb4ba60cb0 100644
--- a/include/linux/guest_memfd.h
+++ b/include/linux/guest_memfd.h
@@ -16,12 +16,18 @@
  * @invalidate_end: called after invalidate_begin returns success. Optional.
  * @prepare: called before a folio is mapped into the guest address space.
  *           Optional.
+ * @accessible: called after prepare returns success and before it's mapped
+ *              into the guest address space. Returns 0 if the folio can be
+ *              accessed.
+ *              Optional. If not present, assumes folios are never accessible.
  * @release: Called when releasing the guest_memfd file. Required.
  */
 struct guest_memfd_operations {
 	int (*invalidate_begin)(struct inode *inode, pgoff_t offset, unsigned long nr);
 	void (*invalidate_end)(struct inode *inode, pgoff_t offset, unsigned long nr);
 	int (*prepare)(struct inode *inode, pgoff_t offset, struct folio *folio);
+	int (*accessible)(struct inode *inode, struct folio *folio,
+			  pgoff_t offset, unsigned long nr);
 	int (*release)(struct inode *inode);
 };
 
@@ -48,5 +54,6 @@ struct file *guest_memfd_alloc(const char *name,
 			       const struct guest_memfd_operations *ops,
 			       loff_t size, unsigned long flags);
 bool is_guest_memfd(struct file *file, const struct guest_memfd_operations *ops);
+int guest_memfd_make_inaccessible(struct file *file, struct folio *folio);
 
 #endif
diff --git a/mm/guest_memfd.c b/mm/guest_memfd.c
index e9d8cab72b28..6b5609932ca5 100644
--- a/mm/guest_memfd.c
+++ b/mm/guest_memfd.c
@@ -9,6 +9,8 @@
 #include <linux/pagemap.h>
 #include <linux/set_memory.h>
 
+#include "internal.h"
+
 static inline int guest_memfd_folio_private(struct folio *folio)
 {
 	unsigned long nr_pages = folio_nr_pages(folio);
@@ -89,7 +91,7 @@ struct folio *guest_memfd_grab_folio(struct file *file, pgoff_t index, u32 flags
 			goto out_err;
 	}
 
-	if (gmem_flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP) {
+	if (!ops->accessible && (gmem_flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)) {
 		r = guest_memfd_folio_private(folio);
 		if (r)
 			goto out_err;
@@ -107,6 +109,82 @@ struct folio *guest_memfd_grab_folio(struct file *file, pgoff_t index, u32 flags
 }
 EXPORT_SYMBOL_GPL(guest_memfd_grab_folio);
 
+int guest_memfd_make_inaccessible(struct file *file, struct folio *folio)
+{
+	unsigned long gmem_flags = (unsigned long)file->private_data;
+	unsigned long i;
+	int r;
+
+	unmap_mapping_folio(folio);
+
+	/**
+	 * We can't use the refcount. It might be elevated due to
+	 * guest/vcpu trying to access same folio as another vcpu
+	 * or because userspace is trying to access folio for same reason
+	 *
+	 * folio_lock serializes the transitions between (in)accessible
+	 */
+	if (folio_maybe_dma_pinned(folio))
+		return -EBUSY;
+
+	if (gmem_flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP) {
+		r = guest_memfd_folio_private(folio);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static vm_fault_t gmem_fault(struct vm_fault *vmf)
+{
+	struct file *file = vmf->vma->vm_file;
+	struct inode *inode = file_inode(file);
+	const struct guest_memfd_operations *ops = inode->i_private;
+	struct folio *folio;
+	pgoff_t off;
+	int r;
+
+	folio = guest_memfd_grab_folio(file, vmf->pgoff, GUEST_MEMFD_GRAB_UPTODATE);
Could grabbing the folio with GUEST_MEMFD_GRAB_UPTODATE cause unintended
zeroing of the page if the page turns out to be inaccessible?
quoted hunk
+	if (!folio)
+		return VM_FAULT_SIGBUS;
+
+	off = vmf->pgoff & (folio_nr_pages(folio) - 1);
+	r = ops->accessible(inode, folio, off, 1);
+	if (r) {
+		folio_unlock(folio);
+		folio_put(folio);
+		return VM_FAULT_SIGBUS;
+	}
+
+	guest_memfd_folio_clear_private(folio);
+
+	vmf->page = folio_page(folio, off);
+
+	return VM_FAULT_LOCKED;
+}
+
+static const struct vm_operations_struct gmem_vm_ops = {
+	.fault = gmem_fault,
+};
+
+static int gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	const struct guest_memfd_operations *ops = file_inode(file)->i_private;
+
+	if (!ops->accessible)
+		return -EPERM;
+
+	/* No support for private mappings to avoid COW.  */
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+	    (VM_SHARED | VM_MAYSHARE))
+		return -EINVAL;
+
+	file_accessed(file);
+	vma->vm_ops = &gmem_vm_ops;
+	return 0;
+}
+
 static long gmem_punch_hole(struct file *file, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(file);
@@ -220,6 +298,7 @@ static int gmem_release(struct inode *inode, struct file *file)
 static struct file_operations gmem_fops = {
 	.open = generic_file_open,
 	.llseek = generic_file_llseek,
+	.mmap = gmem_mmap,
 	.release = gmem_release,
 	.fallocate = gmem_fallocate,
 	.owner = THIS_MODULE,
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help