Re: [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group
From: Peter Zijlstra <peterz@infradead.org>
Date: 2013-10-28 13:50:59
Also in:
lkml
On Thu, Oct 24, 2013 at 01:37:38PM +0530, Preeti U Murthy wrote:
quoted hunk ↗ jump to hunk
kernel/sched/core.c | 5 +++++ kernel/sched/fair.c | 38 ++++++++++++++++++++------------------ kernel/sched/sched.h | 1 + 3 files changed, 26 insertions(+), 18 deletions(-)diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c06b8d3..c540392 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c@@ -5271,6 +5271,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc); DEFINE_PER_CPU(int, sd_llc_size); DEFINE_PER_CPU(int, sd_llc_id); DEFINE_PER_CPU(struct sched_domain *, sd_numa); +DEFINE_PER_CPU(struct sched_domain *, sd_busy); static void update_top_cache_domain(int cpu) {@@ -5290,6 +5291,10 @@ static void update_top_cache_domain(int cpu) sd = lowest_flag_domain(cpu, SD_NUMA); rcu_assign_pointer(per_cpu(sd_numa, cpu), sd); + + sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); + if (sd) + rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent); } /*diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e9c9549..f66cfd9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c@@ -6515,16 +6515,16 @@ static inline void nohz_balance_exit_idle(int cpu) static inline void set_cpu_sd_state_busy(void) { struct sched_domain *sd; + int cpu = smp_processor_id(); rcu_read_lock(); + sd = rcu_dereference(per_cpu(sd_busy, cpu)); if (!sd || !sd->nohz_idle) goto unlock; sd->nohz_idle = 0; + atomic_inc(&sd->groups->sgp->nr_busy_cpus); unlock: rcu_read_unlock(); }@@ -6532,16 +6532,16 @@ unlock: void set_cpu_sd_state_idle(void) { struct sched_domain *sd; + int cpu = smp_processor_id(); rcu_read_lock(); + sd = rcu_dereference(per_cpu(sd_busy, cpu)); if (!sd || sd->nohz_idle) goto unlock; sd->nohz_idle = 1; + atomic_dec(&sd->groups->sgp->nr_busy_cpus); unlock: rcu_read_unlock(); }
Oh nice, that gets rid of the multiple atomics, and it nicely splits this nohz logic into per topology groups -- now if only we could split the rest too :-)
quoted hunk ↗ jump to hunk
@@ -6748,6 +6748,8 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) { unsigned long now = jiffies; struct sched_domain *sd; + struct sched_group_power *sgp; + int nr_busy; if (unlikely(idle_cpu(cpu))) return 0;@@ -6773,22 +6775,22 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) goto need_kick; rcu_read_lock(); + sd = rcu_dereference(per_cpu(sd_busy, cpu)); + if (sd) { + sgp = sd->groups->sgp; + nr_busy = atomic_read(&sgp->nr_busy_cpus); + if (nr_busy > 1) goto need_kick_unlock; }
OK, so far so good.
+ + sd = highest_flag_domain(cpu, SD_ASYM_PACKING); + + if (sd && (cpumask_first_and(nohz.idle_cpus_mask, + sched_domain_span(sd)) < cpu)) + goto need_kick_unlock; + rcu_read_unlock(); return 0;
This again is a bit sad; most archs will not have SD_ASYM_PACKING set at all; this means that they all will do a complete (and pointless) sched domain tree walk here. It would be much better to also introduce sd_asym and do the analogous thing to the new sd_busy.