Thread (10 messages) 10 messages, 3 authors, 2021-05-18

Re: [PATCH 1/1] cgroup: make per-cgroup pressure stall tracking configurable

From: Suren Baghdasaryan <surenb@google.com>
Date: 2021-05-16 19:52:51
Also in: cgroups, lkml

On Fri, May 14, 2021 at 11:50 AM Suren Baghdasaryan [off-list ref] wrote:
On Fri, May 14, 2021 at 11:20 AM Suren Baghdasaryan [off-list ref] wrote:
quoted
On Fri, May 14, 2021 at 10:52 AM Peter Zijlstra [off-list ref] wrote:
quoted
On Fri, May 14, 2021 at 08:54:47AM -0700, Suren Baghdasaryan wrote:
quoted
Correct, for this function CONFIG_CGROUPS=n and
cgroup_disable=pressure are treated the same. True, from the code it's
not very obvious. Do you have some refactoring in mind that would make
it more explicit?
Does this make sense?
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -744,24 +744,26 @@ static void psi_group_change(struct psi_

 static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
 {
+       if (cgroup_psi_enabled()) {
 #ifdef CONFIG_CGROUPS
-       struct cgroup *cgroup = NULL;
+               struct cgroup *cgroup = NULL;

-       if (!*iter)
-               cgroup = task->cgroups->dfl_cgrp;
-       else if (*iter == &psi_system)
-               return NULL;
-       else
-               cgroup = cgroup_parent(*iter);
+               if (!*iter)
+                       cgroup = task->cgroups->dfl_cgrp;
+               else if (*iter == &psi_system)
+                       return NULL;
+               else
+                       cgroup = cgroup_parent(*iter);

-       if (cgroup && cgroup_parent(cgroup)) {
-               *iter = cgroup;
-               return cgroup_psi(cgroup);
-       }
-#else
-       if (*iter)
-               return NULL;
+               if (cgroup && cgroup_parent(cgroup)) {
+                       *iter = cgroup;
+                       return cgroup_psi(cgroup);
+               }
 #endif
+       } else {
+               if (*iter)
+                       return NULL;
+       }
        *iter = &psi_system;
        return &psi_system;
 }
Hmm. Looks like the case when cgroup_psi_enabled()==true and
CONFIG_CGROUPS=n would miss the "if (*iter) return NULL;" condition.
Effectively with CONFIG_CGROUPS=n this becomes:

       if (cgroup_psi_enabled()) {           <== assume this is true
#ifdef CONFIG_CGROUPS                <== compiled out
#endif
       } else {
               if (*iter)                                  <== this
statement will never execute
                       return NULL;
       }
       *iter = &psi_system;
        return &psi_system;
Ah, sorry. I forgot that CONFIG_CGROUPS=n would force
cgroup_psi_enabled()==false (the way function is defined in cgroup.h),
so (CONFIG_CGROUPS=n && cgroup_psi_enabled()==true) is an invalid
configuration. I think adding a comment to your suggestion would make
it more clear.
So your suggestion seems to work. I'll test it and include it in the
next revision. Thanks!
After reworking the code to add a static key I had to expand the
#ifdef CONFIG_CGROUPS section, so I think a code refactoring below
would make sense. It localizes config-specific code and it has the
same exact code for CONFIG_CGROUPS=n and for
cgroup_psi_enabled()==false. WDYT?:
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -181,6 +181,7 @@ struct psi_group psi_system = {
 };

 static void psi_avgs_work(struct work_struct *work);
+static void cgroup_iterator_init(void);

 static void group_init(struct psi_group *group)
 {
@@ -211,6 +212,8 @@ void __init psi_init(void)
                 return;
         }

+        cgroup_iterator_init();
+
         psi_period = jiffies_to_nsecs(PSI_FREQ);
         group_init(&psi_system);
 }
@@ -742,11 +745,31 @@ static void psi_group_change(struct psi_group
*group, int cpu,
                 schedule_delayed_work(&group->avgs_work, PSI_FREQ);
 }

-static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
+static inline struct psi_group *sys_group_iterator(struct task_struct *task,
+                                                   void **iter)
 {
+        *iter = &psi_system;
+        return &psi_system;
+}
+
 #ifdef CONFIG_CGROUPS
+
+DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);
+
+static void cgroup_iterator_init(void)
+{
+        if (!cgroup_psi_enabled())
+                static_branch_enable(&psi_cgroups_disabled);
+}
+
+static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
+{
         struct cgroup *cgroup = NULL;

+        /* Skip to psi_system if per-cgroup accounting is disabled */
+        if (static_branch_unlikely(&psi_cgroups_disabled))
+                return *iter ? NULL : sys_group_iterator(task, iter);
+
         if (!*iter)
                 cgroup = task->cgroups->dfl_cgrp;
         else if (*iter == &psi_system)
@@ -758,14 +781,20 @@ static struct psi_group *iterate_groups(struct
task_struct *task, void **iter)
                 *iter = cgroup;
                 return cgroup_psi(cgroup);
         }
-#else
-        if (*iter)
-                return NULL;
-#endif
-        *iter = &psi_system;
-        return &psi_system;
+
+        return sys_group_iterator(task, iter);
 }

+#else /* CONFIG_CGROUPS */
+static inline void cgroup_iterator_init(void) {}
+
+static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
+{
+        return *iter ? NULL : sys_group_iterator(task, iter);
+}
+
+#endif /* CONFIG_CGROUPS */
+

quoted
quoted
--
To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help