Re: [PATCH v2] kernel: add pid_max to pid_namespace
From: Joel Granados <joel.granados@kernel.org>
Date: 2024-11-19 09:15:47
Also in:
linux-fsdevel, lkml
On Tue, Nov 05, 2024 at 11:10:24AM +0800, Yun Zhou wrote:
It is necessary to have a different pid_max in different containers. For example, multiple containers are running on a host, one of which is Android, and its 32 bit bionic libc only accepts pid <= 65535. So it requires the global pid_max <= 65535. This will cause configuration conflicts with other containers and also limit the maximum number of tasks for the entire system. Signed-off-by: Yun Zhou <redacted> --- - Remove sentinels from ctl_table arrays. v1 - https://lore.kernel.org/all/20241030052933.1041408-1-yun.zhou@windriver.com/ (local) --- include/linux/pid_namespace.h | 1 + kernel/pid.c | 12 +++++------ kernel/pid_namespace.c | 34 ++++++++++++++++++++++++++----- kernel/sysctl.c | 9 -------- kernel/trace/pid_list.c | 2 +- kernel/trace/trace.h | 2 -- kernel/trace/trace_sched_switch.c | 2 +- 7 files changed, 38 insertions(+), 24 deletions(-)
...
quoted hunk ↗ jump to hunk
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index d70ab49d5b4a..a5a8254825d5 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c@@ -111,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; ns->pid_allocated = PIDNS_ADDING; + ns->pid_max = parent_pid_ns->pid_max; #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns); #endif@@ -280,19 +281,44 @@ static int pid_ns_ctl_handler(const struct ctl_table *table, int write, return ret; } +#endif /* CONFIG_CHECKPOINT_RESTORE */ + +static int pid_max_ns_ctl_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct pid_namespace *pid_ns = task_active_pid_ns(current); + struct ctl_table tmp = *table; + + if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns)) + return -EPERM; + + tmp.data = &pid_ns->pid_max; + if (pid_ns->parent) + tmp.extra2 = &pid_ns->parent->pid_max; + + return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); +} -extern int pid_max; static struct ctl_table pid_ns_ctl_table[] = { +#ifdef CONFIG_CHECKPOINT_RESTORE { .procname = "ns_last_pid", .maxlen = sizeof(int), .mode = 0666, /* permissions are checked in the handler */ .proc_handler = pid_ns_ctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &pid_max, + .extra2 = &init_pid_ns.pid_max, }, -}; #endif /* CONFIG_CHECKPOINT_RESTORE */ + { + .procname = "pid_max", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = pid_max_ns_ctl_handler, + .extra1 = &pid_max_min, + .extra2 = &pid_max_max, + }, +};
I see here that the sysctls are without sentinel. Reviewed-by: Joel Granados <joel.granados@kernel.org> -- Joel Granados