Thread (27 messages) 27 messages, 4 authors, 2022-08-15
STALE1419d REVIEWED: 1 (0M)
Revisions (2)
  1. v3 [diff vs current]
  2. v4 current

[PATCH net-next V4 1/3] sched/topology: Add NUMA-based CPUs spread API

From: Tariq Toukan <tariqt@nvidia.com>
Date: 2022-07-28 19:12:35
Also in: lkml
Subsystem: scheduler, the rest · Maintainers: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot, Linus Torvalds

Implement and expose API that sets the spread of CPUs based on distance,
given a NUMA node.  Fallback to legacy logic that uses
cpumask_local_spread.

This logic can be used by device drivers to prefer some remote cpus over
others.

Reviewed-by: Gal Pressman <redacted>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 include/linux/sched/topology.h |  5 ++++
 kernel/sched/topology.c        | 49 ++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 56cffe42abbc..a49167c2a0e5 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -210,6 +210,7 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl);
 # define SD_INIT_NAME(type)
 #endif
 
+void sched_cpus_set_spread(int node, u16 *cpus, int ncpus);
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -231,6 +232,10 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
 	return true;
 }
 
+static inline void sched_cpus_set_spread(int node, u16 *cpus, int ncpus)
+{
+	memset(cpus, 0, ncpus * sizeof(*cpus));
+}
 #endif	/* !CONFIG_SMP */
 
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 05b6c2ad90b9..157aef862c04 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2067,8 +2067,57 @@ int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
 	return found;
 }
 
+static bool sched_cpus_spread_by_distance(int node, u16 *cpus, int ncpus)
+{
+	cpumask_var_t cpumask;
+	int first, i;
+
+	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+		return false;
+
+	cpumask_copy(cpumask, cpu_online_mask);
+
+	first = cpumask_first(cpumask_of_node(node));
+
+	for (i = 0; i < ncpus; i++) {
+		int cpu;
+
+		cpu = sched_numa_find_closest(cpumask, first);
+		if (cpu >= nr_cpu_ids) {
+			free_cpumask_var(cpumask);
+			return false;
+		}
+		cpus[i] = cpu;
+		__cpumask_clear_cpu(cpu, cpumask);
+	}
+
+	free_cpumask_var(cpumask);
+	return true;
+}
+#else
+static bool sched_cpus_spread_by_distance(int node, u16 *cpus, int ncpus)
+{
+	return false;
+}
 #endif /* CONFIG_NUMA */
 
+static void sched_cpus_by_local_spread(int node, u16 *cpus, int ncpus)
+{
+	int i;
+
+	for (i = 0; i < ncpus; i++)
+		cpus[i] = cpumask_local_spread(i, node);
+}
+
+void sched_cpus_set_spread(int node, u16 *cpus, int ncpus)
+{
+	bool success = sched_cpus_spread_by_distance(node, cpus, ncpus);
+
+	if (!success)
+		sched_cpus_by_local_spread(node, cpus, ncpus);
+}
+EXPORT_SYMBOL_GPL(sched_cpus_set_spread);
+
 static int __sdt_alloc(const struct cpumask *cpu_map)
 {
 	struct sched_domain_topology_level *tl;
-- 
2.21.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help