Re: [PATCH 1/2] sched: Add per_cpu cluster domain info and cpus_share_cluster API
From: Gautham R. Shenoy <hidden>
Date: 2021-12-22 10:24:41
Also in:
lkml
Hello Yicong, Barry, On Wed, Dec 15, 2021 at 12:11:48PM +0800, Yicong Yang wrote:
quoted hunk ↗ jump to hunk
From: Barry Song <redacted> Add per-cpu cluster domain info and cpus_share_cluster() API. This is the preparation for the optimization of select_idle_cpu() on platforms with cluster scheduler level. Signed-off-by: Barry Song <redacted> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> --- include/linux/sched/sd_flags.h | 5 +++++ include/linux/sched/topology.h | 8 +++++++- kernel/sched/core.c | 12 ++++++++++++ kernel/sched/sched.h | 2 ++ kernel/sched/topology.c | 9 +++++++++ 5 files changed, 35 insertions(+), 1 deletion(-)diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 57bde66d95f7..0f732bcfbb2c 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h@@ -109,6 +109,11 @@ SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) */ SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) +/* + * Set up for cluster domains sharing resources such as llc tags or l2 + */ +SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS) + /* * Domain members share CPU package resources (i.e. caches) *diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index c07bfa2d80f2..78c3a94fae66 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h@@ -45,7 +45,7 @@ static inline int cpu_smt_flags(void) #ifdef CONFIG_SCHED_CLUSTER static inline int cpu_cluster_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_CLUSTER | SD_SHARE_PKG_RESOURCES;
On non-cluster systems, there would be only one group at the at the CLS domain. Since SD_CLUSTER is also tagged with SDF_NEEDS_GROUP, it would need the presence of two groups. Thus, on such non-cluster systems, the CLS domain will continue to be degenerated in favour of the SMT domain.
quoted hunk ↗ jump to hunk
} #endif@@ -177,6 +177,7 @@ cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); bool cpus_share_cache(int this_cpu, int that_cpu); +bool cpus_share_cluster(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void);@@ -230,6 +231,11 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) return true; } +static inline bool cpus_share_cluster(int this_cpu, int that_cpu) +{ + return true; +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3c9b0fda64ac..11f9b25c3068 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c@@ -3732,6 +3732,18 @@ bool cpus_share_cache(int this_cpu, int that_cpu) return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } +/* + * On non-Cluster machine this function works same with cpus_share_cache() + * as sd_cluster_id equals to sd_llc_id. + */
Ok, so on a non-cluster machine, a cluster is trivially defined to be a single core (this is reflected in what the cpu_cluster_mask() shows up on such machines). However, in cpus_share_cluster(), we are upgrading the definition of a cluster to be group of CPUs that share the LLC. This is inconsistent with the original definition. Is there a way to avoid this inconsistency ?
quoted hunk ↗ jump to hunk
+bool cpus_share_cluster(int this_cpu, int that_cpu) +{ + if (this_cpu == that_cpu) + return true; + + return per_cpu(sd_cluster_id, this_cpu) == per_cpu(sd_cluster_id, that_cpu); +} + static inline bool ttwu_queue_cond(int cpu, int wake_flags) { /*diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0e66749486e7..ddd29879ad40 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h@@ -1763,7 +1763,9 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc); DECLARE_PER_CPU(int, sd_llc_size); DECLARE_PER_CPU(int, sd_llc_id); +DECLARE_PER_CPU(int, sd_cluster_id); DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared); +DECLARE_PER_CPU(struct sched_domain __rcu *, sd_cluster); DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa); DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing); DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index d201a7052a29..5642df384904 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c@@ -644,6 +644,8 @@ static void destroy_sched_domains(struct sched_domain *sd) DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc); DEFINE_PER_CPU(int, sd_llc_size); DEFINE_PER_CPU(int, sd_llc_id); +DEFINE_PER_CPU(int, sd_cluster_id); +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_cluster); DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared); DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa); DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);@@ -669,6 +671,12 @@ static void update_top_cache_domain(int cpu) per_cpu(sd_llc_id, cpu) = id; rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds); + sd = lowest_flag_domain(cpu, SD_CLUSTER); + if (sd) + id = cpumask_first(sched_domain_span(sd)); + rcu_assign_pointer(per_cpu(sd_cluster, cpu), sd); + per_cpu(sd_cluster_id, cpu) = id; + sd = lowest_flag_domain(cpu, SD_NUMA); rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);@@ -1514,6 +1522,7 @@ static unsigned long __read_mostly *sched_numa_onlined_nodes; */ #define TOPOLOGY_SD_FLAGS \ (SD_SHARE_CPUCAPACITY | \ + SD_CLUSTER | \ SD_SHARE_PKG_RESOURCES | \ SD_NUMA | \ SD_ASYM_PACKING)-- 2.33.0
-- Thanks and Regards gautham. _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel