Thread (1 message) 1 message, 1 author, 2021-06-11

[PATCH v3 16/23] mm: Add vdso_base in mm_struct

From: Dmitry Safonov <hidden>
Date: 2021-06-11 18:03:28
Also in: lkml
Subsystem: aio, exec & binfmt api, elf, filesystems (vfs and infrastructure), memory management, memory management - core, memory mapping, rdt - resource allocation, scheduler, the rest, x86 architecture (32-bit and 64-bit) · Maintainers: Benjamin LaHaise, Kees Cook, Alexander Viro, Christian Brauner, Andrew Morton, David Hildenbrand, Liam R. Howlett, Lorenzo Stoakes, Tony Luck, Reinette Chatre, Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot, Linus Torvalds, Thomas Gleixner, Borislav Petkov, Dave Hansen

Instead of having every architecture to define vdso_base/vdso_addr etc,
provide a generic mechanism to track vdso_base for landing in userspace.
It'll minimize per-architecture difference, the number of callbacks to
provide.

Originally, it started from thread [1] where the need for .close()
callback on vm_special_mapping was pointed, this generic code besides
removing duplicated .mremap() callbacks provides a cheaper way to
support munmap() on vdso mappings without introducing .close() callbacks
for every architecture (which would bring even more code duplication).

[1]: https://lore.kernel.org/linux-arch/CAJwJo6ZANqYkSHbQ+3b+Fi_VT80MtrzEV5yreQAWx-L8j8x2zA@mail.gmail.com/ (local)
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Dmitry Safonov <redacted>
---
 arch/Kconfig                              |  3 +++
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c |  3 ++-
 fs/aio.c                                  |  3 ++-
 include/linux/mm.h                        |  3 ++-
 include/linux/mm_types.h                  | 17 +++++++++++++++++
 kernel/fork.c                             |  1 +
 mm/mmap.c                                 | 19 ++++++++++++++++++-
 mm/mremap.c                               |  2 +-
 8 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index c45b770d3579..0e648a1ae412 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1235,6 +1235,9 @@ config HAVE_SPARSE_SYSCALL_NR
 config ARCH_HAS_VDSO_DATA
 	bool
 
+config ARCH_HAS_VDSO_BASE
+	bool
+
 config HAVE_STATIC_CALL
 	bool
 
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 05a89e33fde2..5ab191855816 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1458,7 +1458,8 @@ static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
+static int pseudo_lock_dev_mremap(struct vm_area_struct *old_vma,
+				  struct vm_area_struct *new_vma)
 {
 	/* Not supported */
 	return -EINVAL;
diff --git a/fs/aio.c b/fs/aio.c
index 76ce0cc3ee4e..0b27e9300d0f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -323,7 +323,8 @@ static void aio_free_ring(struct kioctx *ctx)
 	}
 }
 
-static int aio_ring_mremap(struct vm_area_struct *vma)
+static int aio_ring_mremap(struct vm_area_struct *old_vma,
+			   struct vm_area_struct *vma)
 {
 	struct file *file = vma->vm_file;
 	struct mm_struct *mm = vma->vm_mm;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c274f75efcf9..6ef91b337f9d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -589,7 +589,8 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	/* Called any time before splitting to check if it's allowed */
 	int (*may_split)(struct vm_area_struct *area, unsigned long addr);
-	int (*mremap)(struct vm_area_struct *area);
+	int (*mremap)(struct vm_area_struct *old_vma,
+		      struct vm_area_struct *new_vma);
 	/*
 	 * Called by mprotect() to make driver-specific permission
 	 * checks before mprotect() is finalised.   The VMA must not
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e9c5f2051f08..49ad34410132 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -497,6 +497,16 @@ struct mm_struct {
 
 		/* Architecture-specific MM context */
 		mm_context_t context;
+#ifdef CONFIG_ARCH_HAS_VDSO_BASE
+		/*
+		 * Address of special mapping VMA to land after processing
+		 * a signal. Reads are unprotected: if a thread unmaps or
+		 * mremaps the mapping while another thread is processing
+		 * a signal, it can segfault while landing.
+		 */
+		void __user *vdso_base;
+#endif
+#define UNMAPPED_VDSO_BASE TASK_SIZE_MAX
 
 		unsigned long flags; /* Must use atomic bitops to access */
 
@@ -598,6 +608,13 @@ static inline void init_tlb_flush_pending(struct mm_struct *mm)
 	atomic_set(&mm->tlb_flush_pending, 0);
 }
 
+static inline void init_vdso_base(struct mm_struct *mm)
+{
+#ifdef CONFIG_ARCH_HAS_VDSO_BASE
+	mm->vdso_base = (void __user *)UNMAPPED_VDSO_BASE;
+#endif
+}
+
 static inline void inc_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_inc(&mm->tlb_flush_pending);
diff --git a/kernel/fork.c b/kernel/fork.c
index dc06afd725cb..eb4ad74d042e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1082,6 +1082,7 @@ struct mm_struct *mm_alloc(void)
 		return NULL;
 
 	memset(mm, 0, sizeof(*mm));
+	init_vdso_base(mm);
 	return mm_init(mm, current, current_user_ns());
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 4f0d62409b1c..5d1ffce51119 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3381,11 +3381,25 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 
 static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
 
+static void update_vdso_base(struct vm_area_struct *old_vma,
+				unsigned long new_addr)
+{
+#ifdef CONFIG_ARCH_HAS_VDSO_BASE
+	struct mm_struct *mm = old_vma->vm_mm;
+
+	if (WARN_ON_ONCE(!mm))
+		return;
+	if (old_vma->vm_start == (unsigned long)mm->vdso_base)
+		mm->vdso_base = (void __user *)new_addr;
+#endif
+}
+
 /*
  * Having a close hook prevents vma merging regardless of flags.
  */
 static void special_mapping_close(struct vm_area_struct *vma)
 {
+	update_vdso_base(vma, UNMAPPED_VDSO_BASE);
 }
 
 static const char *special_mapping_name(struct vm_area_struct *vma)
@@ -3393,7 +3407,8 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
-static int special_mapping_mremap(struct vm_area_struct *new_vma)
+static int special_mapping_mremap(struct vm_area_struct *old_vma,
+				  struct vm_area_struct *new_vma)
 {
 	struct vm_special_mapping *sm = new_vma->vm_private_data;
 
@@ -3403,6 +3418,8 @@ static int special_mapping_mremap(struct vm_area_struct *new_vma)
 	if (sm->mremap)
 		sm->mremap(sm, new_vma);
 
+	update_vdso_base(old_vma, new_vma->vm_start);
+
 	return 0;
 }
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 47c255b60150..d28ba49d607c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -545,7 +545,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (moved_len < old_len) {
 		err = -ENOMEM;
 	} else if (vma->vm_ops && vma->vm_ops->mremap) {
-		err = vma->vm_ops->mremap(new_vma);
+		err = vma->vm_ops->mremap(vma, new_vma);
 	}
 
 	if (unlikely(err)) {
-- 
2.31.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help