Inter-revision diff: patch 1

Comparing v8 (message) to v4 (message)

--- v8
+++ v4
@@ -8,31 +8,39 @@
 threads in the core that share the L1 cache, translation cache and
 instruction data flow.
 
-This patch
-
-     1) Defines the helper function to parse the contents of
-     "ibm,thread-groups".
-
-     2) On boot, it parses the "ibm,thread-groups" property and caches
-     the CPU-threads sharing the L1 cache in a per-cpu variable named
-     cpu_l1_cache_map.
-
-     3) Initializes a global variable named "has_big_cores" on
-     big-core systems.
-
-     4) Each time a CPU is onlined, it initializes the
-     cpu_smallcore_mask which contains the online siblings of the
-     CPU that share the L1 cache with this CPU.
+This patch defines the helper function to parse the contents of
+"ibm,thread-groups" and a new structure to contain the parsed output.
+
+The patch also creates the sysfs file named "small_core_siblings" that
+returns the physical ids of the threads in the core that share the L1
+cache, translation cache and instruction data flow.
 
 Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
 ---
- arch/powerpc/include/asm/cputhreads.h |   2 +
- arch/powerpc/include/asm/smp.h        |   6 +
- arch/powerpc/kernel/smp.c             | 221 ++++++++++++++++++++++++++++++++++
- 3 files changed, 229 insertions(+)
-
+ Documentation/ABI/testing/sysfs-devices-system-cpu |   8 ++
+ arch/powerpc/include/asm/cputhreads.h              |  22 +++
+ arch/powerpc/kernel/setup-common.c                 | 154 +++++++++++++++++++++
+ arch/powerpc/kernel/sysfs.c                        |  35 +++++
+ 4 files changed, 219 insertions(+)
+
+diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
+index 9c5e7732..41adf1d 100644
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -487,3 +487,11 @@ Description:	Information about CPU vulnerabilities
+ 		"Not affected"	  CPU is not affected by the vulnerability
+ 		"Vulnerable"	  CPU is affected and no mitigation in effect
+ 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
++
++What: 		/sys/devices/system/cpu/cpu[0-9]+/small_core_siblings
++Date:		24-Jul-2018
++KernelVersion:	v4.18.0
++Contact:	Gautham R. Shenoy <ego@linux.vnet.ibm.com>
++Description:	List of Physical ids of CPUs which share the L1 cache,
++		translation cache and instruction data-flow with this CPU.
++Values:		Comma separated list of decimal integers.
 diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
-index d71a909..deb99fd 100644
+index d71a909..33226d7 100644
 --- a/arch/powerpc/include/asm/cputhreads.h
 +++ b/arch/powerpc/include/asm/cputhreads.h
 @@ -23,11 +23,13 @@
@@ -49,52 +57,11 @@
  #define threads_core_mask	(*get_cpu_mask(0))
  #endif
  
-diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
-index 95b66a0..4439893 100644
---- a/arch/powerpc/include/asm/smp.h
-+++ b/arch/powerpc/include/asm/smp.h
-@@ -100,6 +100,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
- DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
- DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
- DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
-+DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
- 
- static inline struct cpumask *cpu_sibling_mask(int cpu)
- {
-@@ -116,6 +117,11 @@ static inline struct cpumask *cpu_l2_cache_mask(int cpu)
- 	return per_cpu(cpu_l2_cache_map, cpu);
+@@ -69,12 +71,32 @@ static inline cpumask_t cpu_online_cores_map(void)
+ 	return cpu_thread_mask_to_cores(cpu_online_mask);
  }
  
-+static inline struct cpumask *cpu_smallcore_mask(int cpu)
-+{
-+	return per_cpu(cpu_smallcore_map, cpu);
-+}
-+
- extern int cpu_to_core_id(int cpu);
- 
- /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
-diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
-index 61c1fad..15095110 100644
---- a/arch/powerpc/kernel/smp.c
-+++ b/arch/powerpc/kernel/smp.c
-@@ -74,14 +74,32 @@
- #endif
- 
- struct thread_info *secondary_ti;
-+bool has_big_cores;
- 
- DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
-+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
- DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
- DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
- 
- EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
- EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
- EXPORT_PER_CPU_SYMBOL(cpu_core_map);
-+EXPORT_SYMBOL_GPL(has_big_cores);
-+
 +#define MAX_THREAD_LIST_SIZE	8
-+#define THREAD_GROUP_SHARE_L1   1
 +struct thread_groups {
 +	unsigned int property;
 +	unsigned int nr_groups;
@@ -102,30 +69,57 @@
 +	unsigned int thread_list[MAX_THREAD_LIST_SIZE];
 +};
 +
-+/*
-+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
-+ * the set its siblings that share the L1-cache.
-+ */
-+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
- 
- /* SMP operations for this machine */
- struct smp_ops_t *smp_ops;
-@@ -674,6 +692,184 @@ static void set_cpus_unrelated(int i, int j,
- }
+ #ifdef CONFIG_SMP
+ int cpu_core_index_of_thread(int cpu);
+ int cpu_first_thread_of_core(int core);
++int parse_thread_groups(struct device_node *dn, struct thread_groups *tg);
++int get_cpu_thread_group_start(int cpu, struct thread_groups *tg);
+ #else
+ static inline int cpu_core_index_of_thread(int cpu) { return cpu; }
+ static inline int cpu_first_thread_of_core(int core) { return core; }
++static inline int parse_thread_groups(struct device_node *dn,
++				      struct thread_groups *tg)
++{
++	return -ENODATA;
++}
++
++static inline int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
++{
++	return -1;
++}
  #endif
+ 
+ static inline int cpu_thread_in_core(int cpu)
+diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
+index 40b44bb..989edc1 100644
+--- a/arch/powerpc/kernel/setup-common.c
++++ b/arch/powerpc/kernel/setup-common.c
+@@ -402,10 +402,12 @@ void __init check_for_initrd(void)
+ #ifdef CONFIG_SMP
+ 
+ int threads_per_core, threads_per_subcore, threads_shift;
++bool has_big_cores;
+ cpumask_t threads_core_mask;
+ EXPORT_SYMBOL_GPL(threads_per_core);
+ EXPORT_SYMBOL_GPL(threads_per_subcore);
+ EXPORT_SYMBOL_GPL(threads_shift);
++EXPORT_SYMBOL_GPL(has_big_cores);
+ EXPORT_SYMBOL_GPL(threads_core_mask);
+ 
+ static void __init cpu_init_thread_core_maps(int tpc)
+@@ -433,6 +435,152 @@ static void __init cpu_init_thread_core_maps(int tpc)
+ 
+ u32 *cpu_to_phys_id = NULL;
  
 +/*
 + * parse_thread_groups: Parses the "ibm,thread-groups" device tree
 + *                      property for the CPU device node @dn and stores
 + *                      the parsed output in the thread_groups
-+ *                      structure @tg if the ibm,thread-groups[0]
-+ *                      matches @property.
++ *                      structure @tg.
 + *
 + * @dn: The device node of the CPU device.
 + * @tg: Pointer to a thread group structure into which the parsed
-+ *      output of "ibm,thread-groups" is stored.
-+ * @property: The property of the thread-group that the caller is
-+ *            interested in.
++ *     output of "ibm,thread-groups" is stored.
 + *
 + * ibm,thread-groups[0..N-1] array defines which group of threads in
 + * the CPU-device node can be grouped together based on the property.
@@ -155,10 +149,10 @@
 + * -ENODATA if property does not have a value, and -EOVERFLOW if the
 + * property data isn't large enough.
 + */
-+static int parse_thread_groups(struct device_node *dn,
-+			       struct thread_groups *tg,
-+			       unsigned int property)
-+{
++int parse_thread_groups(struct device_node *dn,
++			struct thread_groups *tg)
++{
++	unsigned int nr_groups, threads_per_group, property;
 +	int i;
 +	u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
 +	u32 *thread_list;
@@ -167,31 +161,67 @@
 +
 +	ret = of_property_read_u32_array(dn, "ibm,thread-groups",
 +					 thread_group_array, 3);
++
 +	if (ret)
-+		return ret;
-+
-+	tg->property = thread_group_array[0];
-+	tg->nr_groups = thread_group_array[1];
-+	tg->threads_per_group = thread_group_array[2];
-+	if (tg->property != property ||
-+	    tg->nr_groups < 1 ||
-+	    tg->threads_per_group < 1)
-+		return -ENODATA;
-+
-+	total_threads = tg->nr_groups * tg->threads_per_group;
++		goto out_err;
++
++	property = thread_group_array[0];
++	nr_groups = thread_group_array[1];
++	threads_per_group = thread_group_array[2];
++	total_threads = nr_groups * threads_per_group;
 +
 +	ret = of_property_read_u32_array(dn, "ibm,thread-groups",
 +					 thread_group_array,
 +					 3 + total_threads);
 +	if (ret)
-+		return ret;
++		goto out_err;
 +
 +	thread_list = &thread_group_array[3];
 +
 +	for (i = 0 ; i < total_threads; i++)
 +		tg->thread_list[i] = thread_list[i];
 +
++	tg->property = property;
++	tg->nr_groups = nr_groups;
++	tg->threads_per_group = threads_per_group;
++
 +	return 0;
++out_err:
++	tg->property = 0;
++	tg->nr_groups = 0;
++	tg->threads_per_group = 0;
++	return ret;
++}
++
++/*
++ * dt_has_big_core : Parses the device tree property
++ *		    "ibm,thread-groups" for device node pointed by @dn
++ *		    and stores the parsed output in the structure
++ *		    pointed to by @tg.  Then checks if the output in
++ *		    @tg corresponds to a big-core.
++ *
++ * @dn: Device node pointer of the CPU node being checked for a
++ *      big-core.
++ * @tg: Pointer to thread_groups struct in which parsed output of
++ *      "ibm,thread-groups" is recorded.
++ *
++ * Returns true if the @dn points to a big-core.
++ * Returns false if there is an error in parsing "ibm,thread-groups"
++ * or the parsed output doesn't correspond to a big-core.
++ */
++static inline bool dt_has_big_core(struct device_node *dn,
++				   struct thread_groups *tg)
++{
++	if (parse_thread_groups(dn, tg))
++		return false;
++
++	if (tg->property != 1)
++		return false;
++
++	if (tg->nr_groups < 1)
++		return false;
++
++	return true;
 +}
 +
 +/*
@@ -208,7 +238,7 @@
 + * Returns -1 if cpu doesn't belong to any of the groups pointed to by
 + * tg->thread_list.
 + */
-+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
++int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
 +{
 +	int hw_cpu_id = get_hard_smp_processor_id(cpu);
 +	int i, j;
@@ -227,123 +257,95 @@
 +	return -1;
 +}
 +
-+static int init_cpu_l1_cache_map(int cpu)
-+
-+{
-+	struct device_node *dn = of_get_cpu_node(cpu, NULL);
+ /**
+  * setup_cpu_maps - initialize the following cpu maps:
+  *                  cpu_possible_mask
+@@ -457,6 +605,7 @@ void __init smp_setup_cpu_maps(void)
+ 	int cpu = 0;
+ 	int nthreads = 1;
+ 
++	has_big_cores = true;
+ 	DBG("smp_setup_cpu_maps()\n");
+ 
+ 	cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
+@@ -467,6 +616,7 @@ void __init smp_setup_cpu_maps(void)
+ 		const __be32 *intserv;
+ 		__be32 cpu_be;
+ 		int j, len;
++		struct thread_groups tg;
+ 
+ 		DBG("  * %pOF...\n", dn);
+ 
+@@ -505,6 +655,10 @@ void __init smp_setup_cpu_maps(void)
+ 			cpu++;
+ 		}
+ 
++		if (has_big_cores && !dt_has_big_core(dn, &tg)) {
++			has_big_cores = false;
++		}
++
+ 		if (cpu >= nr_cpu_ids) {
+ 			of_node_put(dn);
+ 			break;
+diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
+index 755dc98..f5717de 100644
+--- a/arch/powerpc/kernel/sysfs.c
++++ b/arch/powerpc/kernel/sysfs.c
+@@ -18,6 +18,7 @@
+ #include <asm/smp.h>
+ #include <asm/pmc.h>
+ #include <asm/firmware.h>
++#include <asm/cputhreads.h>
+ 
+ #include "cacheinfo.h"
+ #include "setup.h"
+@@ -1025,6 +1026,33 @@ static ssize_t show_physical_id(struct device *dev,
+ }
+ static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
+ 
++static ssize_t show_small_core_siblings(struct device *dev,
++					struct device_attribute *attr,
++					char *buf)
++{
++	struct cpu *cpu = container_of(dev, struct cpu, dev);
++	struct device_node *dn = of_get_cpu_node(cpu->dev.id, NULL);
 +	struct thread_groups tg;
-+
-+	int first_thread = cpu_first_thread_sibling(cpu);
-+	int i, cpu_group_start = -1, err = 0;
-+
-+	if (!dn)
++	int i, j;
++	ssize_t ret = 0;
++
++	if (parse_thread_groups(dn, &tg))
 +		return -ENODATA;
 +
-+	err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
-+	if (err)
-+		goto out;
-+
-+	zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
-+				GFP_KERNEL,
-+				cpu_to_node(cpu));
-+
-+	cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
-+
-+	if (unlikely(cpu_group_start == -1)) {
-+		WARN_ON_ONCE(1);
-+		err = -ENODATA;
-+		goto out;
-+	}
-+
-+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
-+		int i_group_start = get_cpu_thread_group_start(i, &tg);
-+
-+		if (unlikely(i_group_start == -1)) {
-+			WARN_ON_ONCE(1);
-+			err = -ENODATA;
-+			goto out;
-+		}
-+
-+		if (i_group_start == cpu_group_start)
-+			cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
-+	}
-+
-+out:
-+	of_node_put(dn);
-+	return err;
-+}
-+
-+static int init_big_cores(void)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu) {
-+		int err = init_cpu_l1_cache_map(cpu);
-+
-+		if (err)
-+			return err;
-+
-+		zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
-+					GFP_KERNEL,
-+					cpu_to_node(cpu));
-+	}
-+
-+	has_big_cores = true;
-+	return 0;
-+}
-+
- void __init smp_prepare_cpus(unsigned int max_cpus)
++	i = get_cpu_thread_group_start(cpu->dev.id, &tg);
++
++	if (i == -1)
++		return -ENODATA;
++
++	for (j = 0; j < tg.threads_per_group - 1; j++)
++		ret += sprintf(buf + ret, "%d,", tg.thread_list[i + j]);
++
++	ret += sprintf(buf + ret, "%d\n", tg.thread_list[i + j]);
++
++	return ret;
++}
++static DEVICE_ATTR(small_core_siblings, 0444, show_small_core_siblings, NULL);
++
+ static int __init topology_init(void)
  {
- 	unsigned int cpu;
-@@ -712,6 +908,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
- 	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
- 	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
- 
-+	init_big_cores();
-+	if (has_big_cores) {
-+		cpumask_set_cpu(boot_cpuid,
-+				cpu_smallcore_mask(boot_cpuid));
-+	}
-+
- 	if (smp_ops && smp_ops->probe)
- 		smp_ops->probe();
- }
-@@ -995,10 +1197,28 @@ static void remove_cpu_from_masks(int cpu)
- 		set_cpus_unrelated(cpu, i, cpu_core_mask);
- 		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
- 		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
-+		if (has_big_cores)
-+			set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
+ 	int cpu, r;
+@@ -1048,6 +1076,13 @@ static int __init topology_init(void)
+ 			register_cpu(c, cpu);
+ 
+ 			device_create_file(&c->dev, &dev_attr_physical_id);
++
++			if (has_big_cores) {
++				const struct device_attribute *attr =
++				       &dev_attr_small_core_siblings;
++
++			       device_create_file(&c->dev, attr);
++			}
+ 		}
  	}
- }
- #endif
- 
-+static inline void add_cpu_to_smallcore_masks(int cpu)
-+{
-+	struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
-+	int i, first_thread = cpu_first_thread_sibling(cpu);
-+
-+	if (!has_big_cores)
-+		return;
-+
-+	cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
-+
-+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
-+		if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
-+			set_cpus_related(i, cpu, cpu_smallcore_mask);
-+	}
-+}
-+
- static void add_cpu_to_masks(int cpu)
- {
- 	int first_thread = cpu_first_thread_sibling(cpu);
-@@ -1015,6 +1235,7 @@ static void add_cpu_to_masks(int cpu)
- 		if (cpu_online(i))
- 			set_cpus_related(i, cpu, cpu_sibling_mask);
- 
-+	add_cpu_to_smallcore_masks(cpu);
- 	/*
- 	 * Copy the thread sibling mask into the cache sibling mask
- 	 * and mark any CPUs that share an L2 with this CPU.
+ 	r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online",
 -- 
 1.9.4
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help