Thread (16 messages) 16 messages, 5 authors, 2024-09-19

Re: [PATCH v3 7/8] execmem: add support for cache of large ROX pages

From: Ard Biesheuvel <ardb@kernel.org>
Date: 2024-09-13 15:00:56
Also in: bpf, linux-alpha, linux-arch, linux-m68k, linux-mips, linux-mm, linux-modules, linux-riscv, linux-sh, linux-um, linuxppc-dev, lkml, loongarch, sparclinux

Hi Mike,

On Mon, 9 Sept 2024 at 08:51, Mike Rapoport [off-list ref] wrote:
quoted hunk ↗ jump to hunk
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>

Using large pages to map text areas reduces iTLB pressure and improves
performance.

Extend execmem_alloc() with an ability to use huge pages with ROX
permissions as a cache for smaller allocations.

To populate the cache, a writable large page is allocated from vmalloc with
VM_ALLOW_HUGE_VMAP, filled with invalid instructions and then remapped as
ROX.

Portions of that large page are handed out to execmem_alloc() callers
without any changes to the permissions.

When the memory is freed with execmem_free() it is invalidated again so
that it won't contain stale instructions.

The cache is enabled when an architecture sets EXECMEM_ROX_CACHE flag in
definition of an execmem_range.

Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 include/linux/execmem.h |   2 +
 mm/execmem.c            | 289 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 286 insertions(+), 5 deletions(-)
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index dfdf19f8a5e8..7436aa547818 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -77,12 +77,14 @@ struct execmem_range {

 /**
  * struct execmem_info - architecture parameters for code allocations
+ * @fill_trapping_insns: set memory to contain instructions that will trap
  * @ranges: array of parameter sets defining architecture specific
  * parameters for executable memory allocations. The ranges that are not
  * explicitly initialized by an architecture use parameters defined for
  * @EXECMEM_DEFAULT.
  */
 struct execmem_info {
+       void (*fill_trapping_insns)(void *ptr, size_t size, bool writable);
        struct execmem_range    ranges[EXECMEM_TYPE_MAX];
 };
diff --git a/mm/execmem.c b/mm/execmem.c
index 0f6691e9ffe6..f547c1f3c93d 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -7,28 +7,88 @@
  */

 #include <linux/mm.h>
+#include <linux/mutex.h>
 #include <linux/vmalloc.h>
 #include <linux/execmem.h>
+#include <linux/maple_tree.h>
 #include <linux/moduleloader.h>
 #include <linux/text-patching.h>

+#include <asm/tlbflush.h>
+
+#include "internal.h"
+
 static struct execmem_info *execmem_info __ro_after_init;
 static struct execmem_info default_execmem_info __ro_after_init;

-static void *__execmem_alloc(struct execmem_range *range, size_t size)
+#ifdef CONFIG_MMU
+struct execmem_cache {
+       struct mutex mutex;
+       struct maple_tree busy_areas;
+       struct maple_tree free_areas;
+};
+
+static struct execmem_cache execmem_cache = {
+       .mutex = __MUTEX_INITIALIZER(execmem_cache.mutex),
+       .busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN,
+                                    execmem_cache.mutex),
+       .free_areas = MTREE_INIT_EXT(free_areas, MT_FLAGS_LOCK_EXTERN,
+                                    execmem_cache.mutex),
+};
+
+static void execmem_cache_clean(struct work_struct *work)
+{
+       struct maple_tree *free_areas = &execmem_cache.free_areas;
+       struct mutex *mutex = &execmem_cache.mutex;
+       MA_STATE(mas, free_areas, 0, ULONG_MAX);
+       void *area;
+
+       mutex_lock(mutex);
+       mas_for_each(&mas, area, ULONG_MAX) {
+               size_t size;
+
+               if (!xa_is_value(area))
+                       continue;
+
+               size = xa_to_value(area);
+
+               if (IS_ALIGNED(size, PMD_SIZE) &&
+                   IS_ALIGNED(mas.index, PMD_SIZE)) {
+                       void *ptr = (void *)mas.index;
+
+                       mas_erase(&mas);
+                       vfree(ptr);
+               }
+       }
+       mutex_unlock(mutex);
+}
+
+static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean);
+
+static void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable)
+{
+       if (execmem_info->fill_trapping_insns)
+               execmem_info->fill_trapping_insns(ptr, size, writable);
+       else
+               memset(ptr, 0, size);
Does this really have to be a function pointer with a runtime check?

This could just be a __weak definition, with the arch providing an
override if the memset() is not appropriate.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help