Thread (18 messages) 18 messages, 6 authors, 2021-05-15

Re: [PATCH v2 1/3] sched/fair: Call newidle_balance() from balance_callback on PREEMPT_RT

From: Vincent Guittot <vincent.guittot@linaro.org>
Date: 2021-05-05 12:14:57
Also in: lkml

On Thu, 29 Apr 2021 at 01:28, Scott Wood [off-list ref] wrote:
This is required in order to be able to enable interrupts in the next
patch.  This is limited to PREEMPT_RT to avoid adding potentially
measurable overhead to the non-RT case (requiring a double switch when
pulling a task onto a newly idle cpu).
IIUC, only the newidle_balance is a problem and not the idle load
balance that runs softirq. In this case, why not skipping
newidle_balance entirely in case of preempt_rt and kick an idle load
balance instead as you switch to idle thread context anyway

quoted hunk ↗ jump to hunk
update_misfit_status() is factored out for the PREEMPT_RT case, to ensure
that the misfit status is kept consistent before dropping the lock.

Signed-off-by: Scott Wood <redacted>
---
v2: Use a balance callback, and limit to PREEMPT_RT

 kernel/sched/fair.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 794c2cb945f8..ff369c38a5b5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5660,6 +5660,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)

 #ifdef CONFIG_SMP

+static const bool newidle_balance_in_callback = IS_ENABLED(CONFIG_PREEMPT_RT);
+static DEFINE_PER_CPU(struct callback_head, rebalance_head);
+
 /* Working cpumask for: load_balance, load_balance_newidle. */
 DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
@@ -10549,7 +10552,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
  *     0 - failed, no new tasks
  *   > 0 - success, new (fair) tasks present
  */
-static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+static int do_newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 {
        unsigned long next_balance = jiffies + HZ;
        int this_cpu = this_rq->cpu;
@@ -10557,7 +10560,9 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
        int pulled_task = 0;
        u64 curr_cost = 0;

-       update_misfit_status(NULL, this_rq);
+       if (!newidle_balance_in_callback)
+               update_misfit_status(NULL, this_rq);
+
        /*
         * We must set idle_stamp _before_ calling idle_balance(), such that we
         * measure the duration of idle_balance() as idle time.
@@ -10576,7 +10581,8 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
         * further scheduler activity on it and we're being very careful to
         * re-start the picking loop.
         */
-       rq_unpin_lock(this_rq, rf);
+       if (!newidle_balance_in_callback)
+               rq_unpin_lock(this_rq, rf);

        if (this_rq->avg_idle < sysctl_sched_migration_cost ||
            !READ_ONCE(this_rq->rd->overload)) {
@@ -10655,11 +10661,31 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
        if (pulled_task)
                this_rq->idle_stamp = 0;

-       rq_repin_lock(this_rq, rf);
+       if (!newidle_balance_in_callback)
+               rq_repin_lock(this_rq, rf);

        return pulled_task;
 }

+static void newidle_balance_cb(struct rq *this_rq)
+{
+       update_rq_clock(this_rq);
+       do_newidle_balance(this_rq, NULL);
+}
+
+static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+{
+       if (newidle_balance_in_callback) {
+               update_misfit_status(NULL, this_rq);
+               queue_balance_callback(this_rq,
+                                      &per_cpu(rebalance_head, this_rq->cpu),
+                                      newidle_balance_cb);
+               return 0;
+       }
+
+       return do_newidle_balance(this_rq, rf);
+}
+
 /*
  * run_rebalance_domains is triggered when needed from the scheduler tick.
  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
--
2.27.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help