Thread (6 messages) 6 messages, 5 authors, 2024-11-22

Re: [PATCH v2] kernel: add pid_max to pid_namespace

From: Joel Granados <joel.granados@kernel.org>
Date: 2024-11-19 09:15:47
Also in: linux-fsdevel, lkml

On Tue, Nov 05, 2024 at 11:10:24AM +0800, Yun Zhou wrote:
It is necessary to have a different pid_max in different containers.
For example, multiple containers are running on a host, one of which
is Android, and its 32 bit bionic libc only accepts pid <= 65535. So
it requires the global pid_max <= 65535. This will cause configuration
conflicts with other containers and also limit the maximum number of
tasks for the entire system.

Signed-off-by: Yun Zhou <redacted>
---
 - Remove sentinels from ctl_table arrays.
v1 - https://lore.kernel.org/all/20241030052933.1041408-1-yun.zhou@windriver.com/ (local)
---
 include/linux/pid_namespace.h     |  1 +
 kernel/pid.c                      | 12 +++++------
 kernel/pid_namespace.c            | 34 ++++++++++++++++++++++++++-----
 kernel/sysctl.c                   |  9 --------
 kernel/trace/pid_list.c           |  2 +-
 kernel/trace/trace.h              |  2 --
 kernel/trace/trace_sched_switch.c |  2 +-
 7 files changed, 38 insertions(+), 24 deletions(-)
...
quoted hunk ↗ jump to hunk
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index d70ab49d5b4a..a5a8254825d5 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -111,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	ns->user_ns = get_user_ns(user_ns);
 	ns->ucounts = ucounts;
 	ns->pid_allocated = PIDNS_ADDING;
+	ns->pid_max = parent_pid_ns->pid_max;
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
 	ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
 #endif
@@ -280,19 +281,44 @@ static int pid_ns_ctl_handler(const struct ctl_table *table, int write,
 
 	return ret;
 }
+#endif	/* CONFIG_CHECKPOINT_RESTORE */
+
+static int pid_max_ns_ctl_handler(const struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct pid_namespace *pid_ns = task_active_pid_ns(current);
+	struct ctl_table tmp = *table;
+
+	if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns))
+		return -EPERM;
+
+	tmp.data = &pid_ns->pid_max;
+	if (pid_ns->parent)
+		tmp.extra2 = &pid_ns->parent->pid_max;
+
+	return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+}
 
-extern int pid_max;
 static struct ctl_table pid_ns_ctl_table[] = {
+#ifdef CONFIG_CHECKPOINT_RESTORE
 	{
 		.procname = "ns_last_pid",
 		.maxlen = sizeof(int),
 		.mode = 0666, /* permissions are checked in the handler */
 		.proc_handler = pid_ns_ctl_handler,
 		.extra1 = SYSCTL_ZERO,
-		.extra2 = &pid_max,
+		.extra2 = &init_pid_ns.pid_max,
 	},
-};
 #endif	/* CONFIG_CHECKPOINT_RESTORE */
+	{
+		.procname = "pid_max",
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = pid_max_ns_ctl_handler,
+		.extra1 = &pid_max_min,
+		.extra2 = &pid_max_max,
+	},
+};
I see here that the sysctls are without sentinel.
Reviewed-by: Joel Granados <joel.granados@kernel.org>


-- 

Joel Granados
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help