Re: [PATCH v1 2/2] signal: add procfd_signal() syscall
From: Christian Brauner <christian@brauner.io>
Date: 2018-11-20 04:53:58
Also in:
linux-fsdevel, linux-man, lkml
On Mon, Nov 19, 2018 at 07:59:24AM -0800, Daniel Colascione wrote:
On Mon, Nov 19, 2018 at 2:32 AM, Christian Brauner [off-list ref] wrote:quoted
The kill() syscall operates on process identifiers. After a process has exited its pid can be reused by another process. If a caller sends a signal to a reused pid it will end up signaling the wrong process. This issue has often surfaced and there has been a push [1] to address this problem. A prior patch has introduced the ability to get a file descriptor referencing struct pid by opening /proc/<pid>. This guarantees a stable handle on a process which can be used to send signals to the referenced process. Discussion has shown that a dedicated syscall is preferable over ioctl()s. Thus, the new syscall procfd_signal() is introduced to solve this problem. It operates on a process file descriptor. The syscall takes an additional siginfo_t and flags argument. If siginfo_t is NULL then procfd_signal() behaves like kill() if it is not NULL it behaves like rt_sigqueueinfo. The flags argument is added to allow for future extensions of this syscall. It currently needs to be passed as 0. With this patch a process can be killed via: #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <signal.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/types.h> #include <unistd.h> int main(int argc, char *argv[]) { int ret; char buf[1000]; if (argc < 2) exit(EXIT_FAILURE); ret = snprintf(buf, sizeof(buf), "/proc/%s", argv[1]); if (ret < 0) exit(EXIT_FAILURE); int fd = open(buf, O_DIRECTORY | O_CLOEXEC); if (fd < 0) { printf("%s - Failed to open \"%s\"\n", strerror(errno), buf); exit(EXIT_FAILURE); } ret = syscall(__NR_procfd_signal, fd, SIGKILL, NULL, 0); if (ret < 0) { printf("Failed to send SIGKILL \"%s\"\n", strerror(errno)); close(fd); exit(EXIT_FAILURE); } close(fd); exit(EXIT_SUCCESS); } [1]: https://lkml.org/lkml/2018/11/18/130 Cc: "Eric W. Biederman" <redacted> Cc: Serge Hallyn <serge@hallyn.com> Cc: Jann Horn <jannh@google.com> Cc: Kees Cook <redacted> Cc: Andy Lutomirsky <luto@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Aleksa Sarai <redacted> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Christian Brauner <christian@brauner.io> --- Changelog: v1: - patch introduced --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + fs/proc/base.c | 6 ++ include/linux/proc_fs.h | 1 + include/linux/syscalls.h | 2 + kernel/signal.c | 76 ++++++++++++++++++++++++-- 6 files changed, 81 insertions(+), 6 deletions(-)diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3cf7b533b3d1..e637eab883e9 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl@@ -398,3 +398,4 @@ 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq +387 i386 procfd_signal sys_procfd_signal __ia32_sys_procfd_signaldiff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f0b1709a5ffb..e95f6741ab42 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl@@ -343,6 +343,7 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +335 common procfd_signal __x64_sys_procfd_signal # # x32-specific system call numbers start at 512 to avoid cache impactdiff --git a/fs/proc/base.c b/fs/proc/base.c index 6365a4fea314..a12c9de92bd0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c@@ -3055,6 +3055,12 @@ static const struct file_operations proc_tgid_base_operations = { .release = proc_tgid_release, }; +bool proc_is_procfd(const struct file *file) +{ + return d_is_dir(file->f_path.dentry) && + (file->f_op == &proc_tgid_base_operations); +} + static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry,diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d0e1f1522a78..2d53a47fba34 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h@@ -73,6 +73,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo int (*show)(struct seq_file *, void *), proc_write_t write, void *data); +extern bool proc_is_procfd(const struct file *file); #else /* CONFIG_PROC_FS */diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2ac3d13a915b..a5ca8cb84566 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h@@ -907,6 +907,8 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); +asmlinkage long sys_procfd_signal(int fd, int sig, siginfo_t __user *info, + int flags); /* * Architecture-specific system callsdiff --git a/kernel/signal.c b/kernel/signal.c index 9a32bc2088c9..e8a8929de710 100644 --- a/kernel/signal.c +++ b/kernel/signal.c@@ -19,7 +19,9 @@ #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/sched/cputime.h> +#include <linux/file.h> #include <linux/fs.h> +#include <linux/proc_fs.h> #include <linux/tty.h> #include <linux/binfmts.h> #include <linux/coredump.h>@@ -3286,6 +3288,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, } #endif +static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info) +{ + clear_siginfo(info); + info->si_signo = sig; + info->si_errno = 0; + info->si_code = SI_USER; + info->si_pid = task_tgid_vnr(current); + info->si_uid = from_kuid_munged(current_user_ns(), current_uid()); +} + /** * sys_kill - send a signal to a process * @pid: the PID of the process@@ -3295,16 +3307,68 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct kernel_siginfo info; - clear_siginfo(&info); - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_USER; - info.si_pid = task_tgid_vnr(current); - info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); + prepare_kill_siginfo(sig, &info); return kill_something_info(sig, &info, pid); } +/** + * sys_procfd_signal - send a signal to a process through a process file + * descriptor + * @fd: the file descriptor of the process + * @sig: signal to be sent + * @info: the signal info + * @flags: future flags to be passed + */ +SYSCALL_DEFINE4(procfd_signal, int, fd, int, sig, siginfo_t __user *, info, + int, flags) +{ + int ret; + struct pid *pid; + kernel_siginfo_t kinfo; + struct fd f; + + /* Enforce flags be set to 0 until we add an extension. */ + if (flags) + return -EINVAL; + + f = fdget_raw(fd); + if (!f.file) + return -EBADF; + + ret = -EINVAL; + /* Is this a process file descriptor? */ + if (!proc_is_procfd(f.file)) + goto err; + + pid = f.file->private_data;You never addressed my comment on the previous patch about your use of
Sorry, that thread exploded so quickly that I might have missed it.
private_data here. Why can't you use the struct pid reference that's already in the inode?
If that's what people prefer we can probably use that. There was precedent for stashing away such data in fs/proc/base.c already for various other things including user namespaces and struct mm so I followed this model. A prior version of my patch (I didn't send out) did retrive the inode via proc_pid() in .open() took an additional reference via get_pid() and dropped it in .release(). Do we prefer that?