Thread (7 messages) 7 messages, 2 authors, 2025-08-20
STALE307d

[PATCH -next 2/2] ftrace: Fix potential use-after-free for set_ftrace_{notrace,filter} files

From: Tengda Wu <hidden>
Date: 2025-08-13 02:31:08
Also in: lkml
Subsystem: function hooks (ftrace), the rest, tracing · Maintainers: Steven Rostedt, Masami Hiramatsu, Linus Torvalds

Concurrent read/write operations on the set_ftrace_{notrace,filter}
files may probabilistically trigger the following issues:

[ 2715.745293] BUG: unable to handle page fault for address: 00000003da393970
[ 2715.753736] CPU: 1 UID: 0 PID: 1324 Comm: read Not tainted 6.16.0-next-20250808 #1 PREEMPT(full)
[ 2715.755292] RIP: 0010:ftrace_lookup_ip+0x40/0x70
[ 2715.761114] Call Trace:
[ 2715.761462]  <TASK>
[ 2715.761705]  t_func_next.isra.0+0xaa/0xd0
[ 2715.762049]  t_start+0xa3/0x140
[ 2715.762207]  seq_read_iter+0xe8/0x4a0
[ 2715.762564]  seq_read+0x101/0x140
[ 2715.762769]  vfs_read+0xbd/0x340
[ 2715.763014]  ? preempt_count_add+0x4b/0xa0
[ 2715.763311]  ? do_sys_openat2+0x8c/0xd0
[ 2715.763623]  ksys_read+0x65/0xe0
[ 2715.763797]  do_syscall_64+0x4e/0x1c0
[ 2715.764049]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

The issue can be reproduced with the following script (using the
set_ftrace_notrace file as an example):

  while true; do
    echo __probestub_initcall_level > /sys/kernel/tracing/set_ftrace_notrace &
    cat /sys/kernel/tracing/set_ftrace_notrace &
  done

The root cause is that ftrace_regex_open and ftrace_regex_release
do not properly handle concurrent synchronization for notrace_hash.
Consider a race scenario between a reader and a writer:

1. The reader first obtains the value of notrace_hash via
   ftrace_regex_open().
2. The writer then updates notrace_hash via ftrace_regex_release()
   and frees the memory pointed to by the old notrace_hash.
3. Later, the reader accesses the old notrace_hash memory while
   ftrace_hash_empty() and ftrace_lookup_ip(), leading to a UAF.

CPU 1 (read)                                    CPU 2 (write)
ftrace_regex_open
  hash = ops->func_hash->notrace_hash;
  iter->hash = hash;
                                                ftrace_regex_open
                                                ftrace_regex_release
                                                  orig_hash = &iter->ops->func_hash->notrace_hash;
                                                  old_hash = *orig_hash;
                                                  free_ftrace_hash_rcu(old_hash);
t_start
  ftrace_hash_empty(iter->hash)
  t_func_next
    !ftrace_lookup_ip(iter->hash, rec->ip)

Since the reader's hash is always tied to its file descriptor (fd),
the writer cannot directly manage the reader's hash. To fix this,
introduce a refcount for ftrace_hash, initialized to 1. The count
is incremented only when a reader opens it, and decremented when
either a reader or writer releases it, thereby controlling the timing
of ftrace_hash deallocation.

Fixes: c20489dad156 ("ftrace: Assign iter->hash to filter or notrace hashes on seq read")
Signed-off-by: Tengda Wu <redacted>
---
 kernel/trace/ftrace.c | 27 ++++++++++++++++++++++++---
 kernel/trace/trace.h  |  2 ++
 2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index cade13595b08..be4842054254 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1060,6 +1060,7 @@ struct ftrace_func_probe {
 static const struct hlist_head empty_buckets[1];
 static const struct ftrace_hash empty_hash = {
 	.buckets = (struct hlist_head *)empty_buckets,
+	.refcount = REFCOUNT_INIT(1),
 };
 #define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
 
@@ -1282,6 +1283,22 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
 	call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
 }
 
+static void get_ftrace_hash(struct ftrace_hash *hash)
+{
+	if (!hash || hash == EMPTY_HASH)
+		return;
+	if (!refcount_inc_not_zero(&hash->refcount))
+		WARN_ON(1);
+}
+
+static void put_ftrace_hash_rcu(struct ftrace_hash *hash)
+{
+	if (!hash || hash == EMPTY_HASH)
+		return;
+	if (refcount_dec_and_test(&hash->refcount))
+		call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
+}
+
 /**
  * ftrace_free_filter - remove all filters for an ftrace_ops
  * @ops: the ops to remove the filters from
@@ -1316,6 +1333,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
 	}
 
 	hash->size_bits = size_bits;
+	refcount_set(&hash->refcount, 1);
 
 	return hash;
 }
@@ -3362,7 +3380,7 @@ static int __ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
 	ret = ftrace_hash_move(ops, enable, orig_hash, hash);
 	if (!ret) {
 		ftrace_ops_update_code(ops, &old_hash_ops);
-		free_ftrace_hash_rcu(old_hash);
+		put_ftrace_hash_rcu(old_hash);
 	}
 	return ret;
 }
@@ -3714,7 +3732,7 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops,
 		*orig_subhash = save_hash;
 		free_ftrace_hash_rcu(new_hash);
 	} else {
-		free_ftrace_hash_rcu(save_hash);
+		put_ftrace_hash_rcu(save_hash);
 	}
 	return ret;
 }
@@ -4666,8 +4684,10 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 			trace_parser_put(&iter->parser);
 			goto out_unlock;
 		}
-	} else
+	} else {
 		iter->hash = hash;
+		get_ftrace_hash(iter->hash);
+	}
 
 	ret = 0;
 
@@ -6544,6 +6564,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 		mutex_unlock(&ftrace_lock);
 	} else {
 		/* For read only, the hash is the ops hash */
+		put_ftrace_hash_rcu(iter->hash);
 		iter->hash = NULL;
 	}
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1dbf1d3cf2f1..4936cd218c36 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -22,6 +22,7 @@
 #include <linux/ctype.h>
 #include <linux/once_lite.h>
 #include <linux/ftrace_regs.h>
+#include <linux/refcount.h>
 
 #include "pid_list.h"
 
@@ -905,6 +906,7 @@ struct ftrace_hash {
 	unsigned long		count;
 	unsigned long		flags;
 	struct rcu_head		rcu;
+	refcount_t		refcount;
 };
 
 struct ftrace_func_entry *
-- 
2.34.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help