--- v9
+++ v7
@@ -1,62 +1,69 @@
-Code to add PMU functions required for event initialization,
-read, update, add, del etc. for thread IMC PMU. Thread IMC PMUs are used
-for per-task monitoring.
-
-For each CPU, a page of memory is allocated and is kept static i.e.,
-these pages will exist till the machine shuts down. The base address of
-this page is assigned to the ldbar of that cpu. As soon as we do that,
-the thread IMC counters start running for that cpu and the data of these
-counters are assigned to the page allocated. But we use this for
-per-task monitoring. Whenever we start monitoring a task, the event is
-added is onto the task. At that point, we read the initial value of the
-event. Whenever, we stop monitoring the task, the final value is taken
-and the difference is the event data.
-
-Now, a task can move to a different cpu. Suppose a task X is moving from
-cpu A to cpu B. When the task is scheduled out of A, we get an
-event_del for A, and hence, the event data is updated. And, we stop
-updating the X's event data. As soon as X moves on to B, event_add is
-called for B, and we again update the event_data. And this is how it
-keeps on updating the event data even when the task is scheduled on to
-different cpus.
-
-Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
-Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>
-Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
+This patch adds the PMU functions required for event initialization,
+read, update, add, del etc. for thread IMC PMU. Thread IMC PMUs are used
+for per-task monitoring.
+
+For each CPU, a page of memory is allocated and is kept static i.e.,
+these pages will exist till the machine shuts down. The base address of
+this page is assigned to the ldbar of that cpu. As soon as we do that,
+the thread IMC counters start running for that cpu and the data of these
+counters are assigned to the page allocated. But we use this for
+per-task monitoring. Whenever we start monitoring a task, the event is
+added is onto the task. At that point, we read the initial value of the
+event. Whenever, we stop monitoring the task, the final value is taken
+and the difference is the event data.
+
+Now, a task can move to a different cpu. Suppose a task X is moving from
+cpu A to cpu B. When the task is scheduled out of A, we get an
+event_del for A, and hence, the event data is updated. And, we stop
+updating the X's event data. As soon as X moves on to B, event_add is
+called for B, and we again update the event_data. And this is how it
+keeps on updating the event data even when the task is scheduled on to
+different cpus.
+
+Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
+Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>
+Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/imc-pmu.h | 5 +
- arch/powerpc/perf/imc-pmu.c | 203 ++++++++++++++++++++++++++++--
- arch/powerpc/platforms/powernv/opal-imc.c | 2 +
- 3 files changed, 203 insertions(+), 7 deletions(-)
+ arch/powerpc/perf/imc-pmu.c | 201 ++++++++++++++++++++++++++++++
+ arch/powerpc/platforms/powernv/opal-imc.c | 3 +
+ 3 files changed, 209 insertions(+)
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
-index 5cbc61d..63e7a23 100644
+index 6260e61..cc04712 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
-@@ -43,6 +43,10 @@
- #define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
- #define IMC_DTB_UNIT_COMPAT "ibm,imc-counters"
+@@ -42,6 +42,7 @@
+ * IMC Core engine expects 8K bytes of memory for counter collection.
+ */
+ #define IMC_CORE_COUNTER_MEM 8192
++#define IMC_THREAD_COUNTER_MEM 8192
+
+ /*
+ *Compatbility macros for IMC devices
+@@ -51,6 +52,9 @@
+ #define IMC_DTB_CORE_COMPAT "ibm,imc-counters-core"
+ #define IMC_DTB_THREAD_COMPAT "ibm,imc-counters-thread"
+#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000
+#define THREAD_IMC_ENABLE 0x8000000000000000
-+#define IMC_THREAD_COUNTER_MEM 8192
+
/*
- * Structure to hold memory address information for imc units.
- */
-@@ -105,4 +109,5 @@ extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+ * Structure to hold per chip specific memory address
+ * information for nest pmus. Nest Counter data are exported
+@@ -110,4 +114,5 @@ extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
extern struct imc_pmu *core_imc_pmu;
- extern int core_imc_control(int operation);
- extern int __init init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu *pmu_ptr);
+ extern int __init init_imc_pmu(struct imc_events *events,int idx, struct imc_pmu *pmu_ptr);
+ void core_imc_disable(void);
+void thread_imc_disable(void);
#endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
-index e67680f..13ff6dc 100644
+index ac69d81..b055748 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
-@@ -18,6 +18,9 @@
- #include <asm/smp.h>
- #include <linux/string.h>
+@@ -38,6 +38,9 @@ static u64 per_core_pdbar_add[IMC_MAX_CHIPS][IMC_MAX_CORES];
+ static cpumask_t core_imc_cpumask;
+ struct imc_pmu *core_imc_pmu;
+/* Maintains base address for all the cpus */
+static u64 per_cpu_add[NR_CPUS];
@@ -64,66 +71,12 @@
/* Needed for sanity check */
extern u64 nest_max_offset;
extern u64 core_max_offset;
-@@ -387,18 +390,46 @@ bool is_core_imc_mem_inited(int cpu)
- }
-
- /*
-- * imc_mem_init : Function to support memory allocation for core imc.
-+ * Allocates a page of memory for each of the online cpus, and, writes the
-+ * physical base address of that page to the LDBAR for that cpu. This starts
-+ * the thread IMC counters.
-+ */
-+static void thread_imc_mem_alloc(int cpu_id)
-+{
-+ u64 ldbar_addr, ldbar_value;
-+ int phys_id = topology_physical_package_id(cpu_id);
-+
-+ per_cpu_add[cpu_id] = (u64)alloc_pages_exact_nid(phys_id,
-+ (size_t)IMC_THREAD_COUNTER_MEM, GFP_KERNEL | __GFP_ZERO);
-+ ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
-+ ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
-+ (u64)THREAD_IMC_ENABLE;
-+ mtspr(SPRN_LDBAR, ldbar_value);
-+}
-+
-+/*
-+ * imc_mem_init : Function to support memory allocation for core and thread imc.
- */
- static int imc_mem_init(struct imc_pmu *pmu_ptr)
- {
-- int nr_cores;
-+ int nr_cores, cpu;
-
- if (pmu_ptr->imc_counter_mmaped)
- return 0;
-- nr_cores = num_present_cpus() / threads_per_core;
-- pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), GFP_KERNEL);
-- if (!pmu_ptr->mem_info)
-- return -ENOMEM;
-+ switch (pmu_ptr->domain) {
-+ case IMC_DOMAIN_CORE:
-+ nr_cores = num_present_cpus() / threads_per_core;
-+ pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), GFP_KERNEL);
-+ if (!pmu_ptr->mem_info)
-+ return -ENOMEM;
-+ break;
-+ case IMC_DOMAIN_THREAD:
-+ for_each_online_cpu(cpu)
-+ thread_imc_mem_alloc(cpu);
-+ break;
-+ default:
-+ return -EINVAL;
-+ }
+@@ -482,6 +485,56 @@ static int core_imc_event_init(struct perf_event *event)
return 0;
}
-@@ -592,6 +623,73 @@ static int core_imc_event_init(struct perf_event *event)
- return 0;
- }
-
+static int thread_imc_event_init(struct perf_event *event)
+{
-+ int rc;
+ struct task_struct *target;
+
+ if (event->attr.type != event->pmu->type)
@@ -144,9 +97,45 @@
+ if (!target)
+ return -EINVAL;
+
-+ if (!is_core_imc_mem_inited(event->cpu))
-+ return -ENODEV;
+ event->pmu->task_ctx_nr = perf_sw_context;
++ return 0;
++}
++
++static void thread_imc_read_counter(struct perf_event *event)
++{
++ u64 *addr, data;
++ int cpu_id = smp_processor_id();
++
++ addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
++ data = __be64_to_cpu(READ_ONCE(*addr));
++ local64_set(&event->hw.prev_count, data);
++}
++
++static void thread_imc_perf_event_update(struct perf_event *event)
++{
++ u64 counter_prev, counter_new, final_count, *addr;
++ int cpu_id = smp_processor_id();
++
++ addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
++ counter_prev = local64_read(&event->hw.prev_count);
++ counter_new = __be64_to_cpu(READ_ONCE(*addr));
++ final_count = counter_new - counter_prev;
++
++ local64_set(&event->hw.prev_count, counter_new);
++ local64_add(final_count, &event->count);
++}
++
+ static void imc_read_counter(struct perf_event *event)
+ {
+ u64 *addr, data;
+@@ -723,6 +776,84 @@ static int core_imc_event_add(struct perf_event *event, int flags)
+ }
+
+
++static void thread_imc_event_start(struct perf_event *event, int flags)
++{
++ int rc;
++
+ /*
+ * Core pmu units are enabled only when it is used.
+ * See if this is triggered for the first time.
@@ -158,51 +147,30 @@
+ rc = core_imc_control(IMC_COUNTER_ENABLE);
+ mutex_unlock(&imc_core_reserve);
+ if (rc)
-+ pr_err("IMC: Unable to start the counters\n");
++ pr_err("IMC: Unbale to start the counters\n");
+ }
-+ event->destroy = core_imc_counters_release;
-+ return 0;
-+}
-+
-+static void thread_imc_read_counter(struct perf_event *event)
-+{
-+ u64 *addr, data;
-+ int cpu_id = smp_processor_id();
-+
-+ addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
-+ data = __be64_to_cpu(READ_ONCE(*addr));
-+ local64_set(&event->hw.prev_count, data);
-+}
-+
-+static void thread_imc_perf_event_update(struct perf_event *event)
-+{
-+ u64 counter_prev, counter_new, final_count, *addr;
-+ int cpu_id = smp_processor_id();
-+
-+ addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
-+ counter_prev = local64_read(&event->hw.prev_count);
-+ counter_new = __be64_to_cpu(READ_ONCE(*addr));
-+ final_count = counter_new - counter_prev;
-+
-+ local64_set(&event->hw.prev_count, counter_new);
-+ local64_add(final_count, &event->count);
-+}
-+
- static void imc_read_counter(struct perf_event *event)
- {
- u64 *addr, data;
-@@ -653,6 +751,53 @@ static int imc_event_add(struct perf_event *event, int flags)
- return 0;
- }
-
-+static void thread_imc_event_start(struct perf_event *event, int flags)
-+{
+ thread_imc_read_counter(event);
+}
+
+static void thread_imc_event_stop(struct perf_event *event, int flags)
+{
++ int rc;
++
+ thread_imc_perf_event_update(event);
++ /*
++ * See if we need to disable the IMC PMU.
++ * If no events are currently in use, then we have to take a
++ * mutex to ensure that we don't race with another task doing
++ * enable or disable the core counters.
++ */
++ if (atomic_dec_return(&core_events) == 0) {
++ mutex_lock(&imc_core_reserve);
++ rc = core_imc_control(IMC_COUNTER_DISABLE);
++ mutex_unlock(&imc_core_reserve);
++ if (rc)
++ pr_err("IMC: Disable counters failed\n");
++
++ }
+}
+
+static void thread_imc_event_del(struct perf_event *event, int flags)
@@ -245,11 +213,10 @@
/* update_pmu_ops : Populate the appropriate operations for "pmu" */
static int update_pmu_ops(struct imc_pmu *pmu)
{
-@@ -673,7 +818,26 @@ static int update_pmu_ops(struct imc_pmu *pmu)
- pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+@@ -749,6 +880,19 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
pmu->pmu.attr_groups = pmu->attr_groups;
--
+
+ if (pmu->domain == IMC_DOMAIN_THREAD) {
+ pmu->pmu.event_init = thread_imc_event_init;
+ pmu->pmu.start = thread_imc_event_start;
@@ -261,19 +228,12 @@
+ pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
+ pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
+ pmu->pmu.sched_task = thread_imc_pmu_sched_task;
-+
-+ /*
-+ * Since thread_imc does not have any CPUMASK attr,
-+ * this may drop the "events" attr all together.
-+ * So swap the IMC_EVENT_ATTR slot with IMC_CPUMASK_ATTR.
-+ */
-+ pmu->attr_groups[IMC_CPUMASK_ATTR] = pmu->attr_groups[IMC_EVENT_ATTR];
-+ pmu->attr_groups[IMC_EVENT_ATTR] = NULL;
+ }
++
return 0;
}
-@@ -734,6 +898,27 @@ static int update_events_in_group(struct imc_events *events,
+@@ -809,6 +953,56 @@ static int update_events_in_group(struct imc_events *events,
return 0;
}
@@ -288,22 +248,61 @@
+ on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
+}
+
++static void cleanup_thread_imc_memory(void *dummy)
++{
++ int cpu_id = smp_processor_id();
++ u64 addr = per_cpu_add[cpu_id];
++
++ /* Only if the address is non-zero, shall we free it */
++ if (addr)
++ free_pages(addr, 0);
++}
++
+static void cleanup_all_thread_imc_memory(void)
+{
-+ int i;
-+
-+ for_each_online_cpu(i) {
-+ if (per_cpu_add[i])
-+ free_pages(per_cpu_add[i], 0);
-+ }
++ on_each_cpu(cleanup_thread_imc_memory, NULL, 1);
++}
++
++/*
++ * Allocates a page of memory for each of the online cpus, and, writes the
++ * physical base address of that page to the LDBAR for that cpu. This starts
++ * the thread IMC counters.
++ */
++static void thread_imc_mem_alloc(void *dummy)
++{
++ u64 ldbar_addr, ldbar_value;
++ int cpu_id = smp_processor_id();
++ int phys_id = topology_physical_package_id(smp_processor_id());
++
++ per_cpu_add[cpu_id] = (u64)alloc_pages_exact_nid(phys_id,
++ (size_t)IMC_THREAD_COUNTER_MEM, GFP_KERNEL | __GFP_ZERO);
++ ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
++ ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
++ (u64)THREAD_IMC_ENABLE;
++ mtspr(SPRN_LDBAR, ldbar_value);
++}
++
++void thread_imc_cpu_init(void)
++{
++ on_each_cpu(thread_imc_mem_alloc, NULL, 1);
+}
+
/*
* init_imc_pmu : Setup and register the IMC pmu device.
*
-@@ -799,5 +984,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
+@@ -836,6 +1030,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
+ if (ret)
+ return ret;
+ break;
++ case IMC_DOMAIN_THREAD:
++ thread_imc_cpu_init();
++ break;
+ default:
+ return -1; /* Unknown domain */
+ }
+@@ -868,5 +1065,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
if (pmu_ptr->domain == IMC_DOMAIN_CORE)
- cleanup_all_core_imc_memory(pmu_ptr);
+ cleanup_all_core_imc_memory();
+ /* For thread_imc, we have allocated memory, we need to free it */
+ if (pmu_ptr->domain == IMC_DOMAIN_THREAD)
@@ -312,13 +311,14 @@
return ret;
}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
-index 9bcf58b..478078f 100644
+index 940f6b9..e36722b 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
-@@ -543,6 +543,8 @@ static void opal_imc_counters_shutdown(struct platform_device *pdev)
+@@ -582,6 +582,9 @@ static void opal_imc_counters_shutdown(struct platform_device *pdev)
{
/* Disable the IMC Core functions */
- core_imc_control(IMC_COUNTER_DISABLE);
+ core_imc_disable();
++
+ /* Disable the IMC Thread functions */
+ thread_imc_disable();
}