Re: [PATCH 4/4] sched: Rename arch_update_thermal_pressure into... | linux-trace-kernel

Re: [PATCH 4/4] sched: Rename arch_update_thermal_pressure into arch_update_hw_pressure

From: Lukasz Luba <lukasz.luba@arm.com>
Date: 2023-12-14 08:30:42
Also in: linux-arm-kernel, linux-arm-msm, linux-pm, lkml

On 12/12/23 14:27, Vincent Guittot wrote:

Now that cpufreq provides a pressure value to the scheduler, rename
arch_update_thermal_pressure into hw pressure to reflect that it returns
a pressure applied by HW with a high frequency and which needs filtering.

I would elaborte this meaning 'filtering' here. Something like:
'... high frequency and which needs filtering to smooth the singal and
get an average value. That reflects available capacity of the CPU in
longer period'

quoted hunk ↗ jump to hunk

This pressure is not always related to thermal mitigation but can also be
generated by max current limitation as an example.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
  arch/arm/include/asm/topology.h               |  6 ++---
  arch/arm64/include/asm/topology.h             |  6 ++---
  drivers/base/arch_topology.c                  | 26 +++++++++----------
  drivers/cpufreq/qcom-cpufreq-hw.c             |  4 +--
  include/linux/arch_topology.h                 |  8 +++---
  include/linux/sched/topology.h                |  8 +++---
  .../{thermal_pressure.h => hw_pressure.h}     | 14 +++++-----
  include/trace/events/sched.h                  |  2 +-
  init/Kconfig                                  | 12 ++++-----
  kernel/sched/core.c                           |  8 +++---
  kernel/sched/fair.c                           | 12 ++++-----
  kernel/sched/pelt.c                           | 18 ++++++-------
  kernel/sched/pelt.h                           | 16 ++++++------
  kernel/sched/sched.h                          |  4 +--
  14 files changed, 72 insertions(+), 72 deletions(-)
  rename include/trace/events/{thermal_pressure.h => hw_pressure.h} (55%)

diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 853c4f81ba4a..e175e8596b5d 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h

@@ -22,9 +22,9 @@
  /* Enable topology flag updates */
  #define arch_update_cpu_topology topology_update_cpu_topology
  
-/* Replace task scheduler's default thermal pressure API */
-#define arch_scale_thermal_pressure topology_get_thermal_pressure
-#define arch_update_thermal_pressure	topology_update_thermal_pressure
+/* Replace task scheduler's default hw pressure API */
+#define arch_scale_hw_pressure topology_get_hw_pressure
+#define arch_update_hw_pressure	topology_update_hw_pressure
  
  #else

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a323b109b9c4..a427650bdfba 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h

@@ -35,9 +35,9 @@ void update_freq_counters_refs(void);
  /* Enable topology flag updates */
  #define arch_update_cpu_topology topology_update_cpu_topology
  
-/* Replace task scheduler's default thermal pressure API */
-#define arch_scale_thermal_pressure topology_get_thermal_pressure
-#define arch_update_thermal_pressure	topology_update_thermal_pressure
+/* Replace task scheduler's default hw pressure API */

s/hw/HW/ ?

quoted hunk ↗ jump to hunk

+#define arch_scale_hw_pressure topology_get_hw_pressure
+#define arch_update_hw_pressure	topology_update_hw_pressure
  
  #include <asm-generic/topology.h>

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 0906114963ff..3d8dc9d5c3ad 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c

@@ -22,7 +22,7 @@
  #include <linux/units.h>
  
  #define CREATE_TRACE_POINTS
-#include <trace/events/thermal_pressure.h>
+#include <trace/events/hw_pressure.h>
  
  static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
  static struct cpumask scale_freq_counters_mask;

@@ -160,26 +160,26 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
  	per_cpu(cpu_scale, cpu) = capacity;
  }
  
-DEFINE_PER_CPU(unsigned long, thermal_pressure);
+DEFINE_PER_CPU(unsigned long, hw_pressure);
  
  /**
- * topology_update_thermal_pressure() - Update thermal pressure for CPUs
+ * topology_update_hw_pressure() - Update hw pressure for CPUs

same here: HW?

   * @cpus        : The related CPUs for which capacity has been reduced
   * @capped_freq : The maximum allowed frequency that CPUs can run at
   *
- * Update the value of thermal pressure for all @cpus in the mask. The
+ * Update the value of hw pressure for all @cpus in the mask. The

HW?

   * cpumask should include all (online+offline) affected CPUs, to avoid
   * operating on stale data when hot-plug is used for some CPUs. The
   * @capped_freq reflects the currently allowed max CPUs frequency due to
- * thermal capping. It might be also a boost frequency value, which is bigger
+ * hw capping. It might be also a boost frequency value, which is bigger

HW?

   * than the internal 'capacity_freq_ref' max frequency. In such case the
   * pressure value should simply be removed, since this is an indication that
- * there is no thermal throttling. The @capped_freq must be provided in kHz.
+ * there is no hw throttling. The @capped_freq must be provided in kHz.

HW?

quoted hunk ↗ jump to hunk

   */
-void topology_update_thermal_pressure(const struct cpumask *cpus,
+void topology_update_hw_pressure(const struct cpumask *cpus,
  				      unsigned long capped_freq)
  {
-	unsigned long max_capacity, capacity, th_pressure;
+	unsigned long max_capacity, capacity, hw_pressure;
  	u32 max_freq;
  	int cpu;

@@ -189,21 +189,21 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,
  
  	/*
  	 * Handle properly the boost frequencies, which should simply clean
-	 * the thermal pressure value.
+	 * the hw pressure value.

HW?

quoted hunk ↗ jump to hunk

  	 */
  	if (max_freq <= capped_freq)
  		capacity = max_capacity;
  	else
  		capacity = mult_frac(max_capacity, capped_freq, max_freq);
  
-	th_pressure = max_capacity - capacity;
+	hw_pressure = max_capacity - capacity;
  
-	trace_thermal_pressure_update(cpu, th_pressure);
+	trace_hw_pressure_update(cpu, hw_pressure);
  
  	for_each_cpu(cpu, cpus)
-		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
+		WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
  }
-EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
+EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
  
  static ssize_t cpu_capacity_show(struct device *dev,
  				 struct device_attribute *attr,

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index 70b0f21968a0..a619202ba81d 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c

@@ -347,8 +347,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
  
  	throttled_freq = freq_hz / HZ_PER_KHZ;
  
-	/* Update thermal pressure (the boost frequencies are accepted) */
-	arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
+	/* Update hw pressure (the boost frequencies are accepted) */

HW?

+	arch_update_hw_pressure(policy->related_cpus, throttled_freq);

[snip]

quoted hunk ↗ jump to hunk

  
  	if (housekeeping_cpu(cpu, HK_TYPE_TICK))

@@ -5669,8 +5669,8 @@ void scheduler_tick(void)
  	rq_lock(rq, &rf);
  
  	update_rq_clock(rq);
-	thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
-	update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
+	hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
+	update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);

We switch to task clock here, could you tell me why?
Don't we have to maintain the boot command parameter for the shift?

quoted hunk ↗ jump to hunk

  	curr->sched_class->task_tick(rq, curr, 0);
  	if (sched_feat(LATENCY_WARN))
  		resched_latency = cpu_resched_latency(rq);

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11d3be829302..07050955d6e0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -4936,7 +4936,7 @@ static inline unsigned long get_actual_cpu_capacity(int cpu)
  {
  	unsigned long capacity = arch_scale_cpu_capacity(cpu);
  
-	capacity -= max(thermal_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));
+	capacity -= max(hw_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));
  
  	return capacity;
  }

@@ -4968,7 +4968,7 @@ static inline int util_fits_cpu(unsigned long util,
  	 * Similarly if a task is capped to arch_scale_cpu_capacity(little_cpu), it
  	 * should fit a little cpu even if there's some pressure.
  	 *
-	 * Only exception is for thermal pressure since it has a direct impact
+	 * Only exception is for hw or cpufreq pressure since it has a direct impact

HW?

quoted hunk ↗ jump to hunk

  	 * on available OPP of the system.
  	 *
  	 * We honour it for uclamp_min only as a drop in performance level

@@ -9224,7 +9224,7 @@ static inline bool others_have_blocked(struct rq *rq)
  	if (READ_ONCE(rq->avg_dl.util_avg))
  		return true;
  
-	if (thermal_load_avg(rq))
+	if (hw_load_avg(rq))
  		return true;
  
  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
@@ -9256,7 +9256,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
  {
  	const struct sched_class *curr_class;
  	u64 now = rq_clock_pelt(rq);
-	unsigned long thermal_pressure;
+	unsigned long hw_pressure;
  	bool decayed;
  
  	/*
@@ -9265,11 +9265,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
  	 */
  	curr_class = rq->curr->sched_class;
  
-	thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
+	hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
  
  	decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
  		  update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
-		  update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) |
+		  update_hw_load_avg(now, rq, hw_pressure) |

Here also the rq_clock_thermal() is not used anymore. Shouldn't we
remove the rq_clock_thermal() if not used anymore (I cannot se this in
the patch)?

  		  update_irq_load_avg(rq, 0);
  
  	if (others_have_blocked(rq))

[snip]

quoted hunk ↗ jump to hunk

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e58a54bda77d..7eaf186d071e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -1078,8 +1078,8 @@ struct rq {
  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
  	struct sched_avg	avg_irq;
  #endif
-#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-	struct sched_avg	avg_thermal;
+#ifdef CONFIG_SCHED_HW_PRESSURE
+	struct sched_avg	avg_hw;
  #endif
  	u64			idle_stamp;
  	u64			avg_idle;

I don't see that rq_clock_thermal() removal in this header.
Maybe I miss some patch?

BTW, I like the name 'HW pressure' for this information/signal.

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help