Thread (15 messages) 15 messages, 3 authors, 2011-08-03

Re: [RFC PATCH V1 5/7] cpuidle: (POWER) cpuidle driver for pSeries

From: Trinabh Gupta <hidden>
Date: 2011-06-21 09:00:57
Also in: lkml


On 06/17/2011 10:06 AM, Benjamin Herrenschmidt wrote:
On Tue, 2011-06-07 at 22:00 +0530, Trinabh Gupta wrote:
quoted
+static int snooze_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	unsigned long in_purr, out_purr;
+	ktime_t kt_before, kt_after;
+	s64 usec_delta;
+
+	/*
+	 * Indicate to the HV that we are idle. Now would be
+	 * a good time to find other work to dispatch.
+	 */
+	get_lppaca()->idle = 1;
+	get_lppaca()->donate_dedicated_cpu = 1;
+	in_purr = mfspr(SPRN_PURR);
+
+	kt_before = ktime_get_real();
Don't you want to timestamp before you tell the HV that you are idle ?
Or is the above stuff only polled by phyp when partition interrupts are
enabled ?
Hi Ben,

Yes, timestamp should be before telling HV that we are idle. Thanks
quoted
+	local_irq_enable();
+	set_thread_flag(TIF_POLLING_NRFLAG);
+	while (!need_resched()) {
+		ppc64_runlatch_off();
+		HMT_low();
+		HMT_very_low();
+	}
+	HMT_medium();
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+	smp_mb();
+	local_irq_disable();
+
+	kt_after = ktime_get_real();
+	usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
+
+	out_purr = mfspr(SPRN_PURR);
+	get_lppaca()->wait_state_cycles += out_purr - in_purr;
+	get_lppaca()->donate_dedicated_cpu = 0;
+	get_lppaca()->idle = 0;
+
+	dev->last_residency = (int)usec_delta;
+
+	return index;
+}
+
+static int dedicated_cede_loop(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
+				int index)
+{
+	unsigned long in_purr, out_purr;
+	ktime_t kt_before, kt_after;
+	s64 usec_delta;
+
+	/*
+	 * Indicate to the HV that we are idle. Now would be
+	 * a good time to find other work to dispatch.
+	 */
+	get_lppaca()->idle = 1;
+	get_lppaca()->donate_dedicated_cpu = 1;
+	in_purr = mfspr(SPRN_PURR);
+
+	kt_before = ktime_get_real();
There's a bit too much code duplication for my taste here between the
two functions. Not sure if it can be helped, maybe with some inlines
for the prolog/epilogue ... Looks like stuff that's easy to "fix" in one
place and forget the other...
Yes, I agree that there is too much code duplication in these idle
routines; will fix this.

Thanks
-Trinabh
quoted
+	ppc64_runlatch_off();
+	HMT_medium();
+	cede_processor();
+
+	kt_after = ktime_get_real();
+	usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
+
+	out_purr = mfspr(SPRN_PURR);
+	get_lppaca()->wait_state_cycles += out_purr - in_purr;
+	get_lppaca()->donate_dedicated_cpu = 0;
+	get_lppaca()->idle = 0;
+
+	dev->last_residency = (int)usec_delta;
+
+	return index;
+}
+
+static int shared_cede_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	unsigned long in_purr, out_purr;
+	ktime_t kt_before, kt_after;
+	s64 usec_delta;
+
+	/*
+	 * Indicate to the HV that we are idle. Now would be
+	 * a good time to find other work to dispatch.
+	 */
+	get_lppaca()->idle = 1;
+	get_lppaca()->donate_dedicated_cpu = 1;
+	in_purr = mfspr(SPRN_PURR);
+
+	kt_before = ktime_get_real();
+	/*
+	 * Yield the processor to the hypervisor.  We return if
+	 * an external interrupt occurs (which are driven prior
+	 * to returning here) or if a prod occurs from another
+	 * processor. When returning here, external interrupts
+	 * are enabled.
+	 */
+	cede_processor();
+
+	kt_after = ktime_get_real();
+
+	usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
+
+	out_purr = mfspr(SPRN_PURR);
+	get_lppaca()->wait_state_cycles += out_purr - in_purr;
+	get_lppaca()->donate_dedicated_cpu = 0;
+	get_lppaca()->idle = 0;
+
+	dev->last_residency = (int)usec_delta;
+
+	return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
+	{ /* Snooze */
+		.name = "snooze",
+		.desc = "snooze",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 0,
+		.target_residency = 0,
+		.enter =&snooze_loop },
+	{ /* CEDE */
+		.name = "CEDE",
+		.desc = "CEDE",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 1,
+		.target_residency = 10,
+		.enter =&dedicated_cede_loop },
+};
+
+/*
+ * States for shared partition case.
+ */
+static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
+	{ /* Shared Cede */
+		.name = "Shared Cede",
+		.desc = "Shared Cede",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 0,
+		.target_residency = 0,
+		.enter =&shared_cede_loop },
+};
+
+int pseries_notify_cpuidle_add_cpu(int cpu)
+{
+	struct cpuidle_device *dev =
+			per_cpu_ptr(pseries_idle_cpuidle_devices, cpu);
+	if (dev&&  cpuidle_get_driver()) {
+		cpuidle_disable_device(dev);
+		cpuidle_enable_device(dev);
+	}
+	return 0;
+}
+
+/*
+ * pseries_idle_cpuidle_driver_init()
+ */
+static int pseries_idle_cpuidle_driver_init(void)
+{
+	int cstate;
+	struct cpuidle_driver *drv =&pseries_idle_driver;
+
+	drv->state_count = 0;
+
+	for (cstate = 0; cstate<  MAX_IDLE_STATE_COUNT; ++cstate) {
+
+		if (cstate>  max_cstate)
+			break;
+
+		/* is the state not enabled? */
+		if (cpuidle_state_table[cstate].enter == NULL)
+			continue;
+
+		drv->states[drv->state_count] =	/* structure copy */
+			cpuidle_state_table[cstate];
+
+		if (cpuidle_state_table == dedicated_states)
+			drv->states[drv->state_count].target_residency =
+				__get_cpu_var(smt_snooze_delay);
+
+		drv->state_count += 1;
+	}
+
+	return 0;
+}
+
+/* pseries_idle_devices_uninit(void)
+ * unregister cpuidle devices and de-allocate memory
+ */
+static void pseries_idle_devices_uninit(void)
+{
+	int i;
+	struct cpuidle_device *dev;
+
+	for_each_possible_cpu(i) {
+		dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
+		cpuidle_unregister_device(dev);
+	}
+
+	free_percpu(pseries_idle_cpuidle_devices);
+	return;
+}
+
+/* pseries_idle_devices_init()
+ * allocate, initialize and register cpuidle device
+ */
+static int pseries_idle_devices_init(void)
+{
+	int i;
+	struct cpuidle_driver *drv =&pseries_idle_driver;
+	struct cpuidle_device *dev;
+
+	pseries_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+	if (pseries_idle_cpuidle_devices == NULL)
+		return -ENOMEM;
+
+	for_each_possible_cpu(i) {
+		dev = per_cpu_ptr(pseries_idle_cpuidle_devices, i);
+		dev->state_count = drv->state_count;
+		dev->cpu = i;
+		if (cpuidle_register_device(dev)) {
+			printk(KERN_DEBUG "cpuidle_register_device %d failed!\n",
+				 i);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * pseries_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int pseries_idle_probe(void)
+{
+	if (max_cstate == 0) {
+		printk(KERN_DEBUG "pseries processor idle disabled.\n");
+		return -EPERM;
+	}
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		printk(KERN_DEBUG "Using default idle\n");
+		return -ENODEV;
+	}
+
+	if (get_lppaca()->shared_proc)
+		cpuidle_state_table = shared_states;
+	else
+		cpuidle_state_table = dedicated_states;
+
+	return 0;
+}
+
+static int __init pseries_processor_idle_init(void)
+{
+	int retval;
+
+	retval = pseries_idle_probe();
+	if (retval)
+		return retval;
+
+	pseries_idle_cpuidle_driver_init();
+	retval = cpuidle_register_driver(&pseries_idle_driver);
+	if (retval) {
+		printk(KERN_DEBUG "Registration of pseries driver failed.\n");
+		return retval;
+	}
+
+	retval = pseries_idle_devices_init();
+	if (retval) {
+		pseries_idle_devices_uninit();
+		cpuidle_unregister_driver(&pseries_idle_driver);
+		return retval;
+	}
+
+	printk(KERN_DEBUG "pseries_idle_driver registered\n");
+
+	return 0;
+}
+
+device_initcall(pseries_processor_idle_init);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index e9f6d28..7c60380 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -56,4 +56,7 @@ extern struct device_node *dlpar_configure_connector(u32);
  extern int dlpar_attach_node(struct device_node *);
  extern int dlpar_detach_node(struct device_node *);

+/* Snooze Delay, pseries_idle */
+DECLARE_PER_CPU(long, smt_snooze_delay);
+
  #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 593acce..6893a0c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -584,9 +584,6 @@ static int __init pSeries_probe(void)
  	return 1;
  }

-
-DECLARE_PER_CPU(long, smt_snooze_delay);
-
  static void pseries_dedicated_idle_sleep(void)
  {
  	unsigned int cpu = smp_processor_id();
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index fbffd7e..2e46883 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -150,6 +150,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
  	set_cpu_current_state(cpu, CPU_STATE_ONLINE);
  	set_default_offline_state(cpu);
  #endif
+	pseries_notify_cpuidle_add_cpu(cpu);
  }

  static int __devinit smp_pSeries_kick_cpu(int nr)
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help