Inter-revision diff: patch 9

Comparing v9 (message) to v7 (message)

--- v9
+++ v7
@@ -1,62 +1,69 @@
-Code to add PMU functions required for event initialization,                    
-read, update, add, del etc. for thread IMC PMU. Thread IMC PMUs are used        
-for per-task monitoring.                                                        
-                                                                                
-For each CPU, a page of memory is allocated and is kept static i.e.,            
-these pages will exist till the machine shuts down. The base address of         
-this page is assigned to the ldbar of that cpu. As soon as we do that,          
-the thread IMC counters start running for that cpu and the data of these        
-counters are assigned to the page allocated. But we use this for                
-per-task monitoring. Whenever we start monitoring a task, the event is          
-added is onto the task. At that point, we read the initial value of the         
-event. Whenever, we stop monitoring the task, the final value is taken          
-and the difference is the event data.                                           
-                                                                                
-Now, a task can move to a different cpu. Suppose a task X is moving from        
-cpu A to cpu B. When the task is scheduled out of A, we get an                  
-event_del for A, and hence, the event data is updated. And, we stop             
-updating the X's event data. As soon as X moves on to B, event_add is           
-called for B, and we again update the event_data. And this is how it            
-keeps on updating the event data even when the task is scheduled on to          
-different cpus.                                                                 
-                                                                                
-Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>                        
-Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>                         
-Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>       
+This patch adds the PMU functions required for event initialization,
+read, update, add, del etc. for thread IMC PMU. Thread IMC PMUs are used
+for per-task monitoring. 
+
+For each CPU, a page of memory is allocated and is kept static i.e.,
+these pages will exist till the machine shuts down. The base address of
+this page is assigned to the ldbar of that cpu. As soon as we do that,
+the thread IMC counters start running for that cpu and the data of these
+counters are assigned to the page allocated. But we use this for
+per-task monitoring. Whenever we start monitoring a task, the event is
+added is onto the task. At that point, we read the initial value of the
+event. Whenever, we stop monitoring the task, the final value is taken
+and the difference is the event data.
+
+Now, a task can move to a different cpu. Suppose a task X is moving from
+cpu A to cpu B. When the task is scheduled out of A, we get an
+event_del for A, and hence, the event data is updated. And, we stop
+updating the X's event data. As soon as X moves on to B, event_add is
+called for B, and we again update the event_data. And this is how it
+keeps on updating the event data even when the task is scheduled on to
+different cpus.
+
+Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
+Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>
+Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
 ---
  arch/powerpc/include/asm/imc-pmu.h        |   5 +
- arch/powerpc/perf/imc-pmu.c               | 203 ++++++++++++++++++++++++++++--
- arch/powerpc/platforms/powernv/opal-imc.c |   2 +
- 3 files changed, 203 insertions(+), 7 deletions(-)
+ arch/powerpc/perf/imc-pmu.c               | 201 ++++++++++++++++++++++++++++++
+ arch/powerpc/platforms/powernv/opal-imc.c |   3 +
+ 3 files changed, 209 insertions(+)
 
 diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
-index 5cbc61d..63e7a23 100644
+index 6260e61..cc04712 100644
 --- a/arch/powerpc/include/asm/imc-pmu.h
 +++ b/arch/powerpc/include/asm/imc-pmu.h
-@@ -43,6 +43,10 @@
- #define IMC_DTB_COMPAT			"ibm,opal-in-memory-counters"
- #define IMC_DTB_UNIT_COMPAT		"ibm,imc-counters"
+@@ -42,6 +42,7 @@
+  * IMC Core engine expects 8K bytes of memory for counter collection.
+  */
+ #define IMC_CORE_COUNTER_MEM		8192
++#define IMC_THREAD_COUNTER_MEM		8192
+ 
+ /*
+  *Compatbility macros for IMC devices
+@@ -51,6 +52,9 @@
+ #define IMC_DTB_CORE_COMPAT		"ibm,imc-counters-core"
+ #define IMC_DTB_THREAD_COMPAT		"ibm,imc-counters-thread"
  
 +#define THREAD_IMC_LDBAR_MASK           0x0003ffffffffe000
 +#define THREAD_IMC_ENABLE               0x8000000000000000
-+#define IMC_THREAD_COUNTER_MEM		8192
 +
  /*
-  * Structure to hold memory address information for imc units.
-  */
-@@ -105,4 +109,5 @@ extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+  * Structure to hold per chip specific memory address
+  * information for nest pmus. Nest Counter data are exported
+@@ -110,4 +114,5 @@ extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
  extern struct imc_pmu *core_imc_pmu;
- extern int core_imc_control(int operation);
- extern int __init init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu *pmu_ptr);
+ extern int __init init_imc_pmu(struct imc_events *events,int idx, struct imc_pmu *pmu_ptr);
+ void core_imc_disable(void);
 +void thread_imc_disable(void);
  #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
 diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
-index e67680f..13ff6dc 100644
+index ac69d81..b055748 100644
 --- a/arch/powerpc/perf/imc-pmu.c
 +++ b/arch/powerpc/perf/imc-pmu.c
-@@ -18,6 +18,9 @@
- #include <asm/smp.h>
- #include <linux/string.h>
+@@ -38,6 +38,9 @@ static u64 per_core_pdbar_add[IMC_MAX_CHIPS][IMC_MAX_CORES];
+ static cpumask_t core_imc_cpumask;
+ struct imc_pmu *core_imc_pmu;
  
 +/* Maintains base address for all the cpus */
 +static u64 per_cpu_add[NR_CPUS];
@@ -64,66 +71,12 @@
  /* Needed for sanity check */
  extern u64 nest_max_offset;
  extern u64 core_max_offset;
-@@ -387,18 +390,46 @@ bool is_core_imc_mem_inited(int cpu)
- }
- 
- /*
-- * imc_mem_init : Function to support memory allocation for core imc.
-+ * Allocates a page of memory for each of the online cpus, and, writes the
-+ * physical base address of that page to the LDBAR for that cpu. This starts
-+ * the thread IMC counters.
-+ */
-+static void thread_imc_mem_alloc(int cpu_id)
-+{
-+	u64 ldbar_addr, ldbar_value;
-+	int phys_id = topology_physical_package_id(cpu_id);
-+
-+	per_cpu_add[cpu_id] = (u64)alloc_pages_exact_nid(phys_id,
-+			(size_t)IMC_THREAD_COUNTER_MEM, GFP_KERNEL | __GFP_ZERO);
-+	ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
-+	ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
-+		(u64)THREAD_IMC_ENABLE;
-+	mtspr(SPRN_LDBAR, ldbar_value);
-+}
-+
-+/*
-+ * imc_mem_init : Function to support memory allocation for core and thread imc.
-  */
- static int imc_mem_init(struct imc_pmu *pmu_ptr)
- {
--	int nr_cores;
-+	int nr_cores, cpu;
- 
- 	if (pmu_ptr->imc_counter_mmaped)
- 		return 0;
--	nr_cores = num_present_cpus() / threads_per_core;
--	pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), GFP_KERNEL);
--	if (!pmu_ptr->mem_info)
--		return -ENOMEM;
-+	switch (pmu_ptr->domain) {
-+	case IMC_DOMAIN_CORE:
-+		nr_cores = num_present_cpus() / threads_per_core;
-+		pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), GFP_KERNEL);
-+		if (!pmu_ptr->mem_info)
-+			return -ENOMEM;
-+		break;
-+	case IMC_DOMAIN_THREAD:
-+		for_each_online_cpu(cpu)
-+		thread_imc_mem_alloc(cpu);
-+		break;
-+	default:
-+		return -EINVAL;
-+	}
+@@ -482,6 +485,56 @@ static int core_imc_event_init(struct perf_event *event)
  	return 0;
  }
  
-@@ -592,6 +623,73 @@ static int core_imc_event_init(struct perf_event *event)
- 	return 0;
- }
- 
 +static int thread_imc_event_init(struct perf_event *event)
 +{
-+	int rc;
 +	struct task_struct *target;
 +
 +	if (event->attr.type != event->pmu->type)
@@ -144,9 +97,45 @@
 +	if (!target)
 +		return -EINVAL;
 +
-+	if (!is_core_imc_mem_inited(event->cpu))
-+		return -ENODEV;
 +	event->pmu->task_ctx_nr = perf_sw_context;
++	return 0;
++}
++
++static void thread_imc_read_counter(struct perf_event *event)
++{
++	u64 *addr, data;
++	int cpu_id = smp_processor_id();
++
++	addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
++	data = __be64_to_cpu(READ_ONCE(*addr));
++	local64_set(&event->hw.prev_count, data);
++}
++
++static void thread_imc_perf_event_update(struct perf_event *event)
++{
++	u64 counter_prev, counter_new, final_count, *addr;
++	int cpu_id = smp_processor_id();
++
++	addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
++	counter_prev = local64_read(&event->hw.prev_count);
++	counter_new = __be64_to_cpu(READ_ONCE(*addr));
++	final_count = counter_new - counter_prev;
++
++	local64_set(&event->hw.prev_count, counter_new);
++	local64_add(final_count, &event->count);
++}
++
+ static void imc_read_counter(struct perf_event *event)
+ {
+ 	u64 *addr, data;
+@@ -723,6 +776,84 @@ static int core_imc_event_add(struct perf_event *event, int flags)
+ }
+ 
+ 
++static void thread_imc_event_start(struct perf_event *event, int flags)
++{
++	int rc;
++
 +	/*
 +	 * Core pmu units are enabled only when it is used.
 +	 * See if this is triggered for the first time.
@@ -158,51 +147,30 @@
 +		rc = core_imc_control(IMC_COUNTER_ENABLE);
 +		mutex_unlock(&imc_core_reserve);
 +		if (rc)
-+			pr_err("IMC: Unable to start the counters\n");
++			pr_err("IMC: Unbale to start the counters\n");
 +	}
-+	event->destroy = core_imc_counters_release;
-+	return 0;
-+}
-+
-+static void thread_imc_read_counter(struct perf_event *event)
-+{
-+	u64 *addr, data;
-+	int cpu_id = smp_processor_id();
-+
-+	addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
-+	data = __be64_to_cpu(READ_ONCE(*addr));
-+	local64_set(&event->hw.prev_count, data);
-+}
-+
-+static void thread_imc_perf_event_update(struct perf_event *event)
-+{
-+	u64 counter_prev, counter_new, final_count, *addr;
-+	int cpu_id = smp_processor_id();
-+
-+	addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
-+	counter_prev = local64_read(&event->hw.prev_count);
-+	counter_new = __be64_to_cpu(READ_ONCE(*addr));
-+	final_count = counter_new - counter_prev;
-+
-+	local64_set(&event->hw.prev_count, counter_new);
-+	local64_add(final_count, &event->count);
-+}
-+
- static void imc_read_counter(struct perf_event *event)
- {
- 	u64 *addr, data;
-@@ -653,6 +751,53 @@ static int imc_event_add(struct perf_event *event, int flags)
- 	return 0;
- }
- 
-+static void thread_imc_event_start(struct perf_event *event, int flags)
-+{
 +	thread_imc_read_counter(event);
 +}
 +
 +static void thread_imc_event_stop(struct perf_event *event, int flags)
 +{
++	int rc;
++
 +	thread_imc_perf_event_update(event);
++	/*
++	 * See if we need to disable the IMC PMU.
++	 * If no events are currently in use, then we have to take a
++	 * mutex to ensure that we don't race with another task doing
++	 * enable or disable the core counters.
++	 */
++	if (atomic_dec_return(&core_events) == 0) {
++		mutex_lock(&imc_core_reserve);
++		rc = core_imc_control(IMC_COUNTER_DISABLE);
++		mutex_unlock(&imc_core_reserve);
++		if (rc)
++			pr_err("IMC: Disable counters failed\n");
++
++	}
 +}
 +
 +static void thread_imc_event_del(struct perf_event *event, int flags)
@@ -245,11 +213,10 @@
  /* update_pmu_ops : Populate the appropriate operations for "pmu" */
  static int update_pmu_ops(struct imc_pmu *pmu)
  {
-@@ -673,7 +818,26 @@ static int update_pmu_ops(struct imc_pmu *pmu)
- 	pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+@@ -749,6 +880,19 @@ static int update_pmu_ops(struct imc_pmu *pmu)
  	pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
  	pmu->pmu.attr_groups = pmu->attr_groups;
--
+ 
 +	if (pmu->domain == IMC_DOMAIN_THREAD) {
 +		pmu->pmu.event_init = thread_imc_event_init;
 +		pmu->pmu.start = thread_imc_event_start;
@@ -261,19 +228,12 @@
 +		pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
 +		pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
 +		pmu->pmu.sched_task = thread_imc_pmu_sched_task;
-+
-+		/*
-+		 * Since thread_imc does not have any CPUMASK attr,
-+		 * this may drop the "events" attr all together.
-+		 * So swap the IMC_EVENT_ATTR slot with IMC_CPUMASK_ATTR.
-+		 */
-+		pmu->attr_groups[IMC_CPUMASK_ATTR] = pmu->attr_groups[IMC_EVENT_ATTR];
-+		pmu->attr_groups[IMC_EVENT_ATTR] = NULL;
 +	}
++
  	return 0;
  }
  
-@@ -734,6 +898,27 @@ static int update_events_in_group(struct imc_events *events,
+@@ -809,6 +953,56 @@ static int update_events_in_group(struct imc_events *events,
  	return 0;
  }
  
@@ -288,22 +248,61 @@
 +	on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
 +}
 +
++static void cleanup_thread_imc_memory(void *dummy)
++{
++	int cpu_id = smp_processor_id();
++	u64 addr = per_cpu_add[cpu_id];
++
++	/* Only if the address is non-zero, shall we free it */
++	if (addr)
++		free_pages(addr, 0);
++}
++
 +static void cleanup_all_thread_imc_memory(void)
 +{
-+	int i;
-+
-+	for_each_online_cpu(i) {
-+		if (per_cpu_add[i])
-+			free_pages(per_cpu_add[i], 0);
-+	}
++	on_each_cpu(cleanup_thread_imc_memory, NULL, 1);
++}
++
++/*
++ * Allocates a page of memory for each of the online cpus, and, writes the
++ * physical base address of that page to the LDBAR for that cpu. This starts
++ * the thread IMC counters.
++ */
++static void thread_imc_mem_alloc(void *dummy)
++{
++	u64 ldbar_addr, ldbar_value;
++	int cpu_id = smp_processor_id();
++	int phys_id = topology_physical_package_id(smp_processor_id());
++
++	per_cpu_add[cpu_id] = (u64)alloc_pages_exact_nid(phys_id,
++			(size_t)IMC_THREAD_COUNTER_MEM, GFP_KERNEL | __GFP_ZERO);
++	ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
++	ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
++		(u64)THREAD_IMC_ENABLE;
++	mtspr(SPRN_LDBAR, ldbar_value);
++}
++
++void thread_imc_cpu_init(void)
++{
++	on_each_cpu(thread_imc_mem_alloc, NULL, 1);
 +}
 +
  /*
   * init_imc_pmu : Setup and register the IMC pmu device.
   *
-@@ -799,5 +984,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
+@@ -836,6 +1030,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
+ 		if (ret)
+ 			return ret;
+ 		break;
++	case IMC_DOMAIN_THREAD:
++		thread_imc_cpu_init();
++		break;
+ 	default:
+ 		return -1;  /* Unknown domain */
+ 	}
+@@ -868,5 +1065,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
  	if (pmu_ptr->domain == IMC_DOMAIN_CORE)
- 		cleanup_all_core_imc_memory(pmu_ptr);
+ 		cleanup_all_core_imc_memory();
  
 +	/* For thread_imc, we have allocated memory, we need to free it */
 +	if (pmu_ptr->domain == IMC_DOMAIN_THREAD)
@@ -312,13 +311,14 @@
  	return ret;
  }
 diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
-index 9bcf58b..478078f 100644
+index 940f6b9..e36722b 100644
 --- a/arch/powerpc/platforms/powernv/opal-imc.c
 +++ b/arch/powerpc/platforms/powernv/opal-imc.c
-@@ -543,6 +543,8 @@ static void opal_imc_counters_shutdown(struct platform_device *pdev)
+@@ -582,6 +582,9 @@ static void opal_imc_counters_shutdown(struct platform_device *pdev)
  {
  	/* Disable the IMC Core functions */
- 	core_imc_control(IMC_COUNTER_DISABLE);
+ 	core_imc_disable();
++
 +	/* Disable the IMC Thread functions */
 +	thread_imc_disable();
  }
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help