Thread (22 messages) 22 messages, 6 authors, 2013-10-29

Re: [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group

From: Peter Zijlstra <peterz@infradead.org>
Date: 2013-10-28 13:50:59
Also in: lkml

On Thu, Oct 24, 2013 at 01:37:38PM +0530, Preeti U Murthy wrote:
quoted hunk ↗ jump to hunk
 kernel/sched/core.c  |    5 +++++
 kernel/sched/fair.c  |   38 ++++++++++++++++++++------------------
 kernel/sched/sched.h |    1 +
 3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c06b8d3..c540392 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5271,6 +5271,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
+DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -5290,6 +5291,10 @@ static void update_top_cache_domain(int cpu)
 
 	sd = lowest_flag_domain(cpu, SD_NUMA);
 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
+
+	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
+	if (sd)
+		rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e9c9549..f66cfd9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6515,16 +6515,16 @@ static inline void nohz_balance_exit_idle(int cpu)
 static inline void set_cpu_sd_state_busy(void)
 {
 	struct sched_domain *sd;
+	int cpu = smp_processor_id();
 
 	rcu_read_lock();
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (!sd || !sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 0;
 
+	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -6532,16 +6532,16 @@ unlock:
 void set_cpu_sd_state_idle(void)
 {
 	struct sched_domain *sd;
+	int cpu = smp_processor_id();
 
 	rcu_read_lock();
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (!sd || sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 1;
 
+	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
Oh nice, that gets rid of the multiple atomics, and it nicely splits
this nohz logic into per topology groups -- now if only we could split
the rest too :-)
quoted hunk ↗ jump to hunk
@@ -6748,6 +6748,8 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
+	struct sched_group_power *sgp;
+	int nr_busy;
 
 	if (unlikely(idle_cpu(cpu)))
 		return 0;
@@ -6773,22 +6775,22 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 		goto need_kick;
 
 	rcu_read_lock();
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
+	if (sd) {
+		sgp = sd->groups->sgp;
+		nr_busy = atomic_read(&sgp->nr_busy_cpus);
 
+		if (nr_busy > 1)
 			goto need_kick_unlock;
 	}
OK, so far so good.
+
+	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
+
+	if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
+				  sched_domain_span(sd)) < cpu))
+		goto need_kick_unlock;
+
 	rcu_read_unlock();
 	return 0;
This again is a bit sad; most archs will not have SD_ASYM_PACKING set at
all; this means that they all will do a complete (and pointless) sched
domain tree walk here.

It would be much better to also introduce sd_asym and do the analogous
thing to the new sd_busy.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help