--- v7
+++ v3
@@ -1,123 +1,851 @@
-In the current code, when the thread wakes up in reset vector, some
-of the state restore code and check for whether a thread needs to
-branch to kvm is duplicated. Reorder the code such that this
-duplication is avoided.
+idle_power7.S handles idle entry/exit for POWER7, POWER8 and in next
+patch for POWER9. Rename the file to a non-hardware specific
+name.
-At a higher level this is what the change looks like-
-
-Before this patch -
-power7_wakeup_tb_loss:
- restore hypervisor state
- if (thread needed by kvm)
- goto kvm_start_guest
- restore nvgprs, cr, pc
- rfid to process context
-
-power7_wakeup_loss:
- restore nvgprs, cr, pc
- rfid to process context
-
-reset vector:
- if (waking from deep idle states)
- goto power7_wakeup_tb_loss
- else
- if (thread needed by kvm)
- goto kvm_start_guest
- goto power7_wakeup_loss
-
-After this patch -
-power7_wakeup_tb_loss:
- restore hypervisor state
- return
-
-power7_restore_hyp_resource():
- if (waking from deep idle states)
- goto power7_wakeup_tb_loss
- return
-
-power7_wakeup_loss:
- restore nvgprs, cr, pc
- rfid to process context
-
-reset vector:
- power7_restore_hyp_resource()
- if (thread needed by kvm)
- goto kvm_start_guest
- goto power7_wakeup_loss
-
-Reviewed-by: Paul Mackerras <paulus@samba.org>
-Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Shreyas B. Prabhu <shreyas@linux.vnet.ibm.com>
---
-- No changes since v3
+Changes in v3:
+==============
+ - Instead of moving few common functions from idle_power7.S to
+ idle_power_common.S, renaming idle_power7.S to idle_power_common.S.
-Changes in v3:
-=============
-- Retaining GET_PACA(r13) in System Reset vector instead of moving it
- to power7_restore_hyp_resource
-- Added comments indicating entry conditions for power7_restore_hyp_resource
-- Improved comments around return statements
+ arch/powerpc/kernel/Makefile | 2 +-
+ arch/powerpc/kernel/idle_power7.S | 527 --------------------------------
+ arch/powerpc/kernel/idle_power_common.S | 527 ++++++++++++++++++++++++++++++++
+ 3 files changed, 528 insertions(+), 528 deletions(-)
+ delete mode 100644 arch/powerpc/kernel/idle_power7.S
+ create mode 100644 arch/powerpc/kernel/idle_power_common.S
- arch/powerpc/kernel/exceptions-64s.S | 28 ++------------
- arch/powerpc/kernel/idle_power7.S | 72 +++++++++++++++++++++---------------
- 2 files changed, 46 insertions(+), 54 deletions(-)
-
-diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
-index 4c94406..4a74d6a 100644
---- a/arch/powerpc/kernel/exceptions-64s.S
-+++ b/arch/powerpc/kernel/exceptions-64s.S
-@@ -107,25 +107,9 @@ BEGIN_FTR_SECTION
- beq 9f
-
- cmpwi cr3,r13,2
--
+diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
+index 2da380f..99116da 100644
+--- a/arch/powerpc/kernel/Makefile
++++ b/arch/powerpc/kernel/Makefile
+@@ -47,7 +47,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
+ obj-$(CONFIG_PPC64) += vdso64/
+ obj-$(CONFIG_ALTIVEC) += vecemu.o
+ obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
+-obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
++obj-$(CONFIG_PPC_P7_NAP) += idle_power_common.o
+ procfs-y := proc_powerpc.o
+ obj-$(CONFIG_PROC_FS) += $(procfs-y)
+ rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
+diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
+deleted file mode 100644
+index db59613..0000000
+--- a/arch/powerpc/kernel/idle_power7.S
++++ /dev/null
+@@ -1,527 +0,0 @@
+-/*
+- * This file contains the power_save function for Power7 CPUs.
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-#include <linux/threads.h>
+-#include <asm/processor.h>
+-#include <asm/page.h>
+-#include <asm/cputable.h>
+-#include <asm/thread_info.h>
+-#include <asm/ppc_asm.h>
+-#include <asm/asm-offsets.h>
+-#include <asm/ppc-opcode.h>
+-#include <asm/hw_irq.h>
+-#include <asm/kvm_book3s_asm.h>
+-#include <asm/opal.h>
+-#include <asm/cpuidle.h>
+-#include <asm/book3s/64/mmu-hash.h>
+-
+-#undef DEBUG
+-
+-/*
+- * Use unused space in the interrupt stack to save and restore
+- * registers for winkle support.
+- */
+-#define _SDR1 GPR3
+-#define _RPR GPR4
+-#define _SPURR GPR5
+-#define _PURR GPR6
+-#define _TSCR GPR7
+-#define _DSCR GPR8
+-#define _AMOR GPR9
+-#define _WORT GPR10
+-#define _WORC GPR11
+-
+-/* Idle state entry routines */
+-
+-#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+- /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+- std r0,0(r1); \
+- ptesync; \
+- ld r0,0(r1); \
+-1: cmp cr0,r0,r0; \
+- bne 1b; \
+- IDLE_INST; \
+- b .
+-
+- .text
+-
+-/*
+- * Used by threads when the lock bit of core_idle_state is set.
+- * Threads will spin in HMT_LOW until the lock bit is cleared.
+- * r14 - pointer to core_idle_state
+- * r15 - used to load contents of core_idle_state
+- */
+-
+-core_idle_lock_held:
+- HMT_LOW
+-3: lwz r15,0(r14)
+- andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT
+- bne 3b
+- HMT_MEDIUM
+- lwarx r15,0,r14
+- blr
+-
+-/*
+- * Pass requested state in r3:
+- * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
+- *
+- * To check IRQ_HAPPENED in r4
+- * 0 - don't check
+- * 1 - check
+- */
+-_GLOBAL(power7_powersave_common)
+- /* Use r3 to pass state nap/sleep/winkle */
+- /* NAP is a state loss, we create a regs frame on the
+- * stack, fill it up with the state we care about and
+- * stick a pointer to it in PACAR1. We really only
+- * need to save PC, some CR bits and the NV GPRs,
+- * but for now an interrupt frame will do.
+- */
+- mflr r0
+- std r0,16(r1)
+- stdu r1,-INT_FRAME_SIZE(r1)
+- std r0,_LINK(r1)
+- std r0,_NIP(r1)
+-
+- /* Hard disable interrupts */
+- mfmsr r9
+- rldicl r9,r9,48,1
+- rotldi r9,r9,16
+- mtmsrd r9,1 /* hard-disable interrupts */
+-
+- /* Check if something happened while soft-disabled */
+- lbz r0,PACAIRQHAPPENED(r13)
+- andi. r0,r0,~PACA_IRQ_HARD_DIS@l
+- beq 1f
+- cmpwi cr0,r4,0
+- beq 1f
+- addi r1,r1,INT_FRAME_SIZE
+- ld r0,16(r1)
+- li r3,0 /* Return 0 (no nap) */
+- mtlr r0
+- blr
+-
+-1: /* We mark irqs hard disabled as this is the state we'll
+- * be in when returning and we need to tell arch_local_irq_restore()
+- * about it
+- */
+- li r0,PACA_IRQ_HARD_DIS
+- stb r0,PACAIRQHAPPENED(r13)
+-
+- /* We haven't lost state ... yet */
+- li r0,0
+- stb r0,PACA_NAPSTATELOST(r13)
+-
+- /* Continue saving state */
+- SAVE_GPR(2, r1)
+- SAVE_NVGPRS(r1)
+- mfcr r4
+- std r4,_CCR(r1)
+- std r9,_MSR(r1)
+- std r1,PACAR1(r13)
+-
+- /*
+- * Go to real mode to do the nap, as required by the architecture.
+- * Also, we need to be in real mode before setting hwthread_state,
+- * because as soon as we do that, another thread can switch
+- * the MMU context to the guest.
+- */
+- LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+- li r6, MSR_RI
+- andc r6, r9, r6
+- LOAD_REG_ADDR(r7, power7_enter_nap_mode)
+- mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
+- mtspr SPRN_SRR0, r7
+- mtspr SPRN_SRR1, r5
+- rfid
+-
+- .globl power7_enter_nap_mode
+-power7_enter_nap_mode:
+-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+- /* Tell KVM we're napping */
+- li r4,KVM_HWTHREAD_IN_NAP
+- stb r4,HSTATE_HWTHREAD_STATE(r13)
+-#endif
+- stb r3,PACA_THREAD_IDLE_STATE(r13)
+- cmpwi cr3,r3,PNV_THREAD_SLEEP
+- bge cr3,2f
+- IDLE_STATE_ENTER_SEQ(PPC_NAP)
+- /* No return */
+-2:
+- /* Sleep or winkle */
+- lbz r7,PACA_THREAD_MASK(r13)
+- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+-lwarx_loop1:
+- lwarx r15,0,r14
+-
+- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+- bnel core_idle_lock_held
+-
+- andc r15,r15,r7 /* Clear thread bit */
+-
+- andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+-
+-/*
+- * If cr0 = 0, then current thread is the last thread of the core entering
+- * sleep. Last thread needs to execute the hardware bug workaround code if
+- * required by the platform.
+- * Make the workaround call unconditionally here. The below branch call is
+- * patched out when the idle states are discovered if the platform does not
+- * require it.
+- */
+-.global pnv_fastsleep_workaround_at_entry
+-pnv_fastsleep_workaround_at_entry:
+- beq fastsleep_workaround_at_entry
+-
+- stwcx. r15,0,r14
+- bne- lwarx_loop1
+- isync
+-
+-common_enter: /* common code for all the threads entering sleep or winkle */
+- bgt cr3,enter_winkle
+- IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+-
+-fastsleep_workaround_at_entry:
+- ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+- stwcx. r15,0,r14
+- bne- lwarx_loop1
+- isync
+-
+- /* Fast sleep workaround */
+- li r3,1
+- li r4,1
+- li r0,OPAL_CONFIG_CPU_IDLE_STATE
+- bl opal_call_realmode
+-
+- /* Clear Lock bit */
+- li r0,0
+- lwsync
+- stw r0,0(r14)
+- b common_enter
+-
+-enter_winkle:
+- /*
+- * Note all register i.e per-core, per-subcore or per-thread is saved
+- * here since any thread in the core might wake up first
+- */
+- mfspr r3,SPRN_SDR1
+- std r3,_SDR1(r1)
+- mfspr r3,SPRN_RPR
+- std r3,_RPR(r1)
+- mfspr r3,SPRN_SPURR
+- std r3,_SPURR(r1)
+- mfspr r3,SPRN_PURR
+- std r3,_PURR(r1)
+- mfspr r3,SPRN_TSCR
+- std r3,_TSCR(r1)
+- mfspr r3,SPRN_DSCR
+- std r3,_DSCR(r1)
+- mfspr r3,SPRN_AMOR
+- std r3,_AMOR(r1)
+- mfspr r3,SPRN_WORT
+- std r3,_WORT(r1)
+- mfspr r3,SPRN_WORC
+- std r3,_WORC(r1)
+- IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+-
+-_GLOBAL(power7_idle)
+- /* Now check if user or arch enabled NAP mode */
+- LOAD_REG_ADDRBASE(r3,powersave_nap)
+- lwz r4,ADDROFF(powersave_nap)(r3)
+- cmpwi 0,r4,0
+- beqlr
+- li r3, 1
+- /* fall through */
+-
+-_GLOBAL(power7_nap)
+- mr r4,r3
+- li r3,PNV_THREAD_NAP
+- b power7_powersave_common
+- /* No return */
+-
+-_GLOBAL(power7_sleep)
+- li r3,PNV_THREAD_SLEEP
+- li r4,1
+- b power7_powersave_common
+- /* No return */
+-
+-_GLOBAL(power7_winkle)
+- li r3,3
+- li r4,1
+- b power7_powersave_common
+- /* No return */
+-
+-#define CHECK_HMI_INTERRUPT \
+- mfspr r0,SPRN_SRR1; \
+-BEGIN_FTR_SECTION_NESTED(66); \
+- rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \
+-FTR_SECTION_ELSE_NESTED(66); \
+- rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \
+-ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
+- cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
+- bne 20f; \
+- /* Invoke opal call to handle hmi */ \
+- ld r2,PACATOC(r13); \
+- ld r1,PACAR1(r13); \
+- std r3,ORIG_GPR3(r1); /* Save original r3 */ \
+- li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
+- bl opal_call_realmode; \
+- ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
+-20: nop;
+-
+-
+-/*
+- * Called from reset vector. Check whether we have woken up with
+- * hypervisor state loss. If yes, restore hypervisor state and return
+- * back to reset vector.
+- *
+- * r13 - Contents of HSPRG0
+- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+- */
+-_GLOBAL(power7_restore_hyp_resource)
- /*
- * Check if last bit of HSPGR0 is set. This indicates whether we are
- * waking up from winkle.
- */
- GET_PACA(r13)
- clrldi r5,r13,63
- clrrdi r13,r13,1
- cmpwi cr4,r5,1
- mtspr SPRN_HSPRG0,r13
-+ bl power7_restore_hyp_resource
-
+-
- lbz r0,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr2,r0,PNV_THREAD_NAP
-- bgt cr2,8f /* Either sleep or Winkle */
--
-- /* Waking up from nap should not cause hypervisor state loss */
+- bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */
+-
+- /*
+- * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
+- * up from nap. At this stage CR3 shouldn't contains 'gt' since that
+- * indicates we are waking with hypervisor state loss from nap.
+- */
- bgt cr3,.
-
-- /* Waking up from nap */
- li r0,PNV_THREAD_RUNNING
- stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
-
-@@ -143,13 +127,9 @@ BEGIN_FTR_SECTION
-
- /* Return SRR1 from power7_nap() */
- mfspr r3,SPRN_SRR1
-- beq cr3,2f
-- b power7_wakeup_noloss
--2: b power7_wakeup_loss
--
-- /* Fast Sleep wakeup on PowerNV */
--8: GET_PACA(r13)
-- b power7_wakeup_tb_loss
-+ blt cr3,2f
-+ b power7_wakeup_loss
-+2: b power7_wakeup_noloss
-
- 9:
- END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
-index 705c867..d5def06 100644
---- a/arch/powerpc/kernel/idle_power7.S
-+++ b/arch/powerpc/kernel/idle_power7.S
-@@ -276,6 +276,39 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
- 20: nop;
-
-
+- blr /* Return back to System Reset vector from where
+- power7_restore_hyp_resource was invoked */
+-
+-
+-_GLOBAL(power7_wakeup_tb_loss)
+- ld r2,PACATOC(r13);
+- ld r1,PACAR1(r13)
+- /*
+- * Before entering any idle state, the NVGPRs are saved in the stack
+- * and they are restored before switching to the process context. Hence
+- * until they are restored, they are free to be used.
+- *
+- * Save SRR1 and LR in NVGPRs as they might be clobbered in
+- * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required
+- * to determine the wakeup reason if we branch to kvm_start_guest. LR
+- * is required to return back to reset vector after hypervisor state
+- * restore is complete.
+- */
+- mflr r17
+- mfspr r16,SPRN_SRR1
+-BEGIN_FTR_SECTION
+- CHECK_HMI_INTERRUPT
+-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+-
+- lbz r7,PACA_THREAD_MASK(r13)
+- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+-lwarx_loop2:
+- lwarx r15,0,r14
+- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+- /*
+- * Lock bit is set in one of the 2 cases-
+- * a. In the sleep/winkle enter path, the last thread is executing
+- * fastsleep workaround code.
+- * b. In the wake up path, another thread is executing fastsleep
+- * workaround undo code or resyncing timebase or restoring context
+- * In either case loop until the lock bit is cleared.
+- */
+- bnel core_idle_lock_held
+-
+- cmpwi cr2,r15,0
+- lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
+- and r4,r4,r15
+- cmpwi cr1,r4,0 /* Check if first in subcore */
+-
+- /*
+- * At this stage
+- * cr1 - 0b0100 if first thread to wakeup in subcore
+- * cr2 - 0b0100 if first thread to wakeup in core
+- * cr3- 0b0010 if waking up from sleep or winkle
+- * cr4 - 0b0100 if waking up from winkle
+- */
+-
+- or r15,r15,r7 /* Set thread bit */
+-
+- beq cr1,first_thread_in_subcore
+-
+- /* Not first thread in subcore to wake up */
+- stwcx. r15,0,r14
+- bne- lwarx_loop2
+- isync
+- b common_exit
+-
+-first_thread_in_subcore:
+- /* First thread in subcore to wakeup */
+- ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+- stwcx. r15,0,r14
+- bne- lwarx_loop2
+- isync
+-
+- /*
+- * If waking up from sleep, subcore state is not lost. Hence
+- * skip subcore state restore
+- */
+- bne cr4,subcore_state_restored
+-
+- /* Restore per-subcore state */
+- ld r4,_SDR1(r1)
+- mtspr SPRN_SDR1,r4
+- ld r4,_RPR(r1)
+- mtspr SPRN_RPR,r4
+- ld r4,_AMOR(r1)
+- mtspr SPRN_AMOR,r4
+-
+-subcore_state_restored:
+- /*
+- * Check if the thread is also the first thread in the core. If not,
+- * skip to clear_lock.
+- */
+- bne cr2,clear_lock
+-
+-first_thread_in_core:
+-
+- /*
+- * First thread in the core waking up from fastsleep. It needs to
+- * call the fastsleep workaround code if the platform requires it.
+- * Call it unconditionally here. The below branch instruction will
+- * be patched out when the idle states are discovered if platform
+- * does not require workaround.
+- */
+-.global pnv_fastsleep_workaround_at_exit
+-pnv_fastsleep_workaround_at_exit:
+- b fastsleep_workaround_at_exit
+-
+-timebase_resync:
+- /* Do timebase resync if we are waking up from sleep. Use cr3 value
+- * set in exceptions-64s.S */
+- ble cr3,clear_lock
+- /* Time base re-sync */
+- li r0,OPAL_RESYNC_TIMEBASE
+- bl opal_call_realmode;
+- /* TODO: Check r3 for failure */
+-
+- /*
+- * If waking up from sleep, per core state is not lost, skip to
+- * clear_lock.
+- */
+- bne cr4,clear_lock
+-
+- /* Restore per core state */
+- ld r4,_TSCR(r1)
+- mtspr SPRN_TSCR,r4
+- ld r4,_WORC(r1)
+- mtspr SPRN_WORC,r4
+-
+-clear_lock:
+- andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+- lwsync
+- stw r15,0(r14)
+-
+-common_exit:
+- /*
+- * Common to all threads.
+- *
+- * If waking up from sleep, hypervisor state is not lost. Hence
+- * skip hypervisor state restore.
+- */
+- bne cr4,hypervisor_state_restored
+-
+- /* Waking up from winkle */
+-
+- /* Restore per thread state */
+- bl __restore_cpu_power8
+-
+- /* Restore SLB from PACA */
+- ld r8,PACA_SLBSHADOWPTR(r13)
+-
+- .rept SLB_NUM_BOLTED
+- li r3, SLBSHADOW_SAVEAREA
+- LDX_BE r5, r8, r3
+- addi r3, r3, 8
+- LDX_BE r6, r8, r3
+- andis. r7,r5,SLB_ESID_V@h
+- beq 1f
+- slbmte r6,r5
+-1: addi r8,r8,16
+- .endr
+-
+- ld r4,_SPURR(r1)
+- mtspr SPRN_SPURR,r4
+- ld r4,_PURR(r1)
+- mtspr SPRN_PURR,r4
+- ld r4,_DSCR(r1)
+- mtspr SPRN_DSCR,r4
+- ld r4,_WORT(r1)
+- mtspr SPRN_WORT,r4
+-
+-hypervisor_state_restored:
+-
+- mtspr SPRN_SRR1,r16
+- mtlr r17
+- blr /* Return back to System Reset vector from where
+- power7_restore_hyp_resource was invoked */
+-
+-fastsleep_workaround_at_exit:
+- li r3,1
+- li r4,0
+- li r0,OPAL_CONFIG_CPU_IDLE_STATE
+- bl opal_call_realmode
+- b timebase_resync
+-
+-/*
+- * R3 here contains the value that will be returned to the caller
+- * of power7_nap.
+- */
+-_GLOBAL(power7_wakeup_loss)
+- ld r1,PACAR1(r13)
+-BEGIN_FTR_SECTION
+- CHECK_HMI_INTERRUPT
+-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+- REST_NVGPRS(r1)
+- REST_GPR(2, r1)
+- ld r6,_CCR(r1)
+- ld r4,_MSR(r1)
+- ld r5,_NIP(r1)
+- addi r1,r1,INT_FRAME_SIZE
+- mtcr r6
+- mtspr SPRN_SRR1,r4
+- mtspr SPRN_SRR0,r5
+- rfid
+-
+-/*
+- * R3 here contains the value that will be returned to the caller
+- * of power7_nap.
+- */
+-_GLOBAL(power7_wakeup_noloss)
+- lbz r0,PACA_NAPSTATELOST(r13)
+- cmpwi r0,0
+- bne power7_wakeup_loss
+-BEGIN_FTR_SECTION
+- CHECK_HMI_INTERRUPT
+-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+- ld r1,PACAR1(r13)
+- ld r6,_CCR(r1)
+- ld r4,_MSR(r1)
+- ld r5,_NIP(r1)
+- addi r1,r1,INT_FRAME_SIZE
+- mtcr r6
+- mtspr SPRN_SRR1,r4
+- mtspr SPRN_SRR0,r5
+- rfid
+diff --git a/arch/powerpc/kernel/idle_power_common.S b/arch/powerpc/kernel/idle_power_common.S
+new file mode 100644
+index 0000000..db59613
+--- /dev/null
++++ b/arch/powerpc/kernel/idle_power_common.S
+@@ -0,0 +1,527 @@
++/*
++ * This file contains the power_save function for Power7 CPUs.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#include <linux/threads.h>
++#include <asm/processor.h>
++#include <asm/page.h>
++#include <asm/cputable.h>
++#include <asm/thread_info.h>
++#include <asm/ppc_asm.h>
++#include <asm/asm-offsets.h>
++#include <asm/ppc-opcode.h>
++#include <asm/hw_irq.h>
++#include <asm/kvm_book3s_asm.h>
++#include <asm/opal.h>
++#include <asm/cpuidle.h>
++#include <asm/book3s/64/mmu-hash.h>
++
++#undef DEBUG
++
++/*
++ * Use unused space in the interrupt stack to save and restore
++ * registers for winkle support.
++ */
++#define _SDR1 GPR3
++#define _RPR GPR4
++#define _SPURR GPR5
++#define _PURR GPR6
++#define _TSCR GPR7
++#define _DSCR GPR8
++#define _AMOR GPR9
++#define _WORT GPR10
++#define _WORC GPR11
++
++/* Idle state entry routines */
++
++#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
++ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
++ std r0,0(r1); \
++ ptesync; \
++ ld r0,0(r1); \
++1: cmp cr0,r0,r0; \
++ bne 1b; \
++ IDLE_INST; \
++ b .
++
++ .text
++
++/*
++ * Used by threads when the lock bit of core_idle_state is set.
++ * Threads will spin in HMT_LOW until the lock bit is cleared.
++ * r14 - pointer to core_idle_state
++ * r15 - used to load contents of core_idle_state
++ */
++
++core_idle_lock_held:
++ HMT_LOW
++3: lwz r15,0(r14)
++ andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT
++ bne 3b
++ HMT_MEDIUM
++ lwarx r15,0,r14
++ blr
++
++/*
++ * Pass requested state in r3:
++ * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
++ *
++ * To check IRQ_HAPPENED in r4
++ * 0 - don't check
++ * 1 - check
++ */
++_GLOBAL(power7_powersave_common)
++ /* Use r3 to pass state nap/sleep/winkle */
++ /* NAP is a state loss, we create a regs frame on the
++ * stack, fill it up with the state we care about and
++ * stick a pointer to it in PACAR1. We really only
++ * need to save PC, some CR bits and the NV GPRs,
++ * but for now an interrupt frame will do.
++ */
++ mflr r0
++ std r0,16(r1)
++ stdu r1,-INT_FRAME_SIZE(r1)
++ std r0,_LINK(r1)
++ std r0,_NIP(r1)
++
++ /* Hard disable interrupts */
++ mfmsr r9
++ rldicl r9,r9,48,1
++ rotldi r9,r9,16
++ mtmsrd r9,1 /* hard-disable interrupts */
++
++ /* Check if something happened while soft-disabled */
++ lbz r0,PACAIRQHAPPENED(r13)
++ andi. r0,r0,~PACA_IRQ_HARD_DIS@l
++ beq 1f
++ cmpwi cr0,r4,0
++ beq 1f
++ addi r1,r1,INT_FRAME_SIZE
++ ld r0,16(r1)
++ li r3,0 /* Return 0 (no nap) */
++ mtlr r0
++ blr
++
++1: /* We mark irqs hard disabled as this is the state we'll
++ * be in when returning and we need to tell arch_local_irq_restore()
++ * about it
++ */
++ li r0,PACA_IRQ_HARD_DIS
++ stb r0,PACAIRQHAPPENED(r13)
++
++ /* We haven't lost state ... yet */
++ li r0,0
++ stb r0,PACA_NAPSTATELOST(r13)
++
++ /* Continue saving state */
++ SAVE_GPR(2, r1)
++ SAVE_NVGPRS(r1)
++ mfcr r4
++ std r4,_CCR(r1)
++ std r9,_MSR(r1)
++ std r1,PACAR1(r13)
++
++ /*
++ * Go to real mode to do the nap, as required by the architecture.
++ * Also, we need to be in real mode before setting hwthread_state,
++ * because as soon as we do that, another thread can switch
++ * the MMU context to the guest.
++ */
++ LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
++ li r6, MSR_RI
++ andc r6, r9, r6
++ LOAD_REG_ADDR(r7, power7_enter_nap_mode)
++ mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
++ mtspr SPRN_SRR0, r7
++ mtspr SPRN_SRR1, r5
++ rfid
++
++ .globl power7_enter_nap_mode
++power7_enter_nap_mode:
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++ /* Tell KVM we're napping */
++ li r4,KVM_HWTHREAD_IN_NAP
++ stb r4,HSTATE_HWTHREAD_STATE(r13)
++#endif
++ stb r3,PACA_THREAD_IDLE_STATE(r13)
++ cmpwi cr3,r3,PNV_THREAD_SLEEP
++ bge cr3,2f
++ IDLE_STATE_ENTER_SEQ(PPC_NAP)
++ /* No return */
++2:
++ /* Sleep or winkle */
++ lbz r7,PACA_THREAD_MASK(r13)
++ ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
++lwarx_loop1:
++ lwarx r15,0,r14
++
++ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
++ bnel core_idle_lock_held
++
++ andc r15,r15,r7 /* Clear thread bit */
++
++ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
++
++/*
++ * If cr0 = 0, then current thread is the last thread of the core entering
++ * sleep. Last thread needs to execute the hardware bug workaround code if
++ * required by the platform.
++ * Make the workaround call unconditionally here. The below branch call is
++ * patched out when the idle states are discovered if the platform does not
++ * require it.
++ */
++.global pnv_fastsleep_workaround_at_entry
++pnv_fastsleep_workaround_at_entry:
++ beq fastsleep_workaround_at_entry
++
++ stwcx. r15,0,r14
++ bne- lwarx_loop1
++ isync
++
++common_enter: /* common code for all the threads entering sleep or winkle */
++ bgt cr3,enter_winkle
++ IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
++
++fastsleep_workaround_at_entry:
++ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
++ stwcx. r15,0,r14
++ bne- lwarx_loop1
++ isync
++
++ /* Fast sleep workaround */
++ li r3,1
++ li r4,1
++ li r0,OPAL_CONFIG_CPU_IDLE_STATE
++ bl opal_call_realmode
++
++ /* Clear Lock bit */
++ li r0,0
++ lwsync
++ stw r0,0(r14)
++ b common_enter
++
++enter_winkle:
++ /*
++ * Note all register i.e per-core, per-subcore or per-thread is saved
++ * here since any thread in the core might wake up first
++ */
++ mfspr r3,SPRN_SDR1
++ std r3,_SDR1(r1)
++ mfspr r3,SPRN_RPR
++ std r3,_RPR(r1)
++ mfspr r3,SPRN_SPURR
++ std r3,_SPURR(r1)
++ mfspr r3,SPRN_PURR
++ std r3,_PURR(r1)
++ mfspr r3,SPRN_TSCR
++ std r3,_TSCR(r1)
++ mfspr r3,SPRN_DSCR
++ std r3,_DSCR(r1)
++ mfspr r3,SPRN_AMOR
++ std r3,_AMOR(r1)
++ mfspr r3,SPRN_WORT
++ std r3,_WORT(r1)
++ mfspr r3,SPRN_WORC
++ std r3,_WORC(r1)
++ IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
++
++_GLOBAL(power7_idle)
++ /* Now check if user or arch enabled NAP mode */
++ LOAD_REG_ADDRBASE(r3,powersave_nap)
++ lwz r4,ADDROFF(powersave_nap)(r3)
++ cmpwi 0,r4,0
++ beqlr
++ li r3, 1
++ /* fall through */
++
++_GLOBAL(power7_nap)
++ mr r4,r3
++ li r3,PNV_THREAD_NAP
++ b power7_powersave_common
++ /* No return */
++
++_GLOBAL(power7_sleep)
++ li r3,PNV_THREAD_SLEEP
++ li r4,1
++ b power7_powersave_common
++ /* No return */
++
++_GLOBAL(power7_winkle)
++ li r3,3
++ li r4,1
++ b power7_powersave_common
++ /* No return */
++
++#define CHECK_HMI_INTERRUPT \
++ mfspr r0,SPRN_SRR1; \
++BEGIN_FTR_SECTION_NESTED(66); \
++ rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \
++FTR_SECTION_ELSE_NESTED(66); \
++ rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \
++ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
++ cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
++ bne 20f; \
++ /* Invoke opal call to handle hmi */ \
++ ld r2,PACATOC(r13); \
++ ld r1,PACAR1(r13); \
++ std r3,ORIG_GPR3(r1); /* Save original r3 */ \
++ li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
++ bl opal_call_realmode; \
++ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
++20: nop;
++
++
+/*
+ * Called from reset vector. Check whether we have woken up with
+ * hypervisor state loss. If yes, restore hypervisor state and return
@@ -151,63 +879,221 @@
+ power7_restore_hyp_resource was invoked */
+
+
- _GLOBAL(power7_wakeup_tb_loss)
- ld r2,PACATOC(r13);
- ld r1,PACAR1(r13)
-@@ -284,11 +317,13 @@ _GLOBAL(power7_wakeup_tb_loss)
- * and they are restored before switching to the process context. Hence
- * until they are restored, they are free to be used.
- *
-- * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
-- * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
-- * wakeup reason if we branch to kvm_start_guest.
++_GLOBAL(power7_wakeup_tb_loss)
++ ld r2,PACATOC(r13);
++ ld r1,PACAR1(r13)
++ /*
++ * Before entering any idle state, the NVGPRs are saved in the stack
++ * and they are restored before switching to the process context. Hence
++ * until they are restored, they are free to be used.
++ *
+ * Save SRR1 and LR in NVGPRs as they might be clobbered in
+ * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required
+ * to determine the wakeup reason if we branch to kvm_start_guest. LR
+ * is required to return back to reset vector after hypervisor state
+ * restore is complete.
- */
--
++ */
+ mflr r17
- mfspr r16,SPRN_SRR1
- BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-@@ -438,33 +473,10 @@ common_exit:
-
- hypervisor_state_restored:
-
-- li r5,PNV_THREAD_RUNNING
-- stb r5,PACA_THREAD_IDLE_STATE(r13)
--
- mtspr SPRN_SRR1,r16
--#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-- li r0,KVM_HWTHREAD_IN_KERNEL
-- stb r0,HSTATE_HWTHREAD_STATE(r13)
-- /* Order setting hwthread_state vs. testing hwthread_req */
-- sync
-- lbz r0,HSTATE_HWTHREAD_REQ(r13)
-- cmpwi r0,0
-- beq 6f
-- b kvm_start_guest
--6:
--#endif
--
-- REST_NVGPRS(r1)
-- REST_GPR(2, r1)
-- ld r3,_CCR(r1)
-- ld r4,_MSR(r1)
-- ld r5,_NIP(r1)
-- addi r1,r1,INT_FRAME_SIZE
-- mtcr r3
-- mfspr r3,SPRN_SRR1 /* Return SRR1 */
-- mtspr SPRN_SRR1,r4
-- mtspr SPRN_SRR0,r5
-- rfid
++ mfspr r16,SPRN_SRR1
++BEGIN_FTR_SECTION
++ CHECK_HMI_INTERRUPT
++END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
++
++ lbz r7,PACA_THREAD_MASK(r13)
++ ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
++lwarx_loop2:
++ lwarx r15,0,r14
++ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
++ /*
++ * Lock bit is set in one of the 2 cases-
++ * a. In the sleep/winkle enter path, the last thread is executing
++ * fastsleep workaround code.
++ * b. In the wake up path, another thread is executing fastsleep
++ * workaround undo code or resyncing timebase or restoring context
++ * In either case loop until the lock bit is cleared.
++ */
++ bnel core_idle_lock_held
++
++ cmpwi cr2,r15,0
++ lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
++ and r4,r4,r15
++ cmpwi cr1,r4,0 /* Check if first in subcore */
++
++ /*
++ * At this stage
++ * cr1 - 0b0100 if first thread to wakeup in subcore
++ * cr2 - 0b0100 if first thread to wakeup in core
++ * cr3- 0b0010 if waking up from sleep or winkle
++ * cr4 - 0b0100 if waking up from winkle
++ */
++
++ or r15,r15,r7 /* Set thread bit */
++
++ beq cr1,first_thread_in_subcore
++
++ /* Not first thread in subcore to wake up */
++ stwcx. r15,0,r14
++ bne- lwarx_loop2
++ isync
++ b common_exit
++
++first_thread_in_subcore:
++ /* First thread in subcore to wakeup */
++ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
++ stwcx. r15,0,r14
++ bne- lwarx_loop2
++ isync
++
++ /*
++ * If waking up from sleep, subcore state is not lost. Hence
++ * skip subcore state restore
++ */
++ bne cr4,subcore_state_restored
++
++ /* Restore per-subcore state */
++ ld r4,_SDR1(r1)
++ mtspr SPRN_SDR1,r4
++ ld r4,_RPR(r1)
++ mtspr SPRN_RPR,r4
++ ld r4,_AMOR(r1)
++ mtspr SPRN_AMOR,r4
++
++subcore_state_restored:
++ /*
++ * Check if the thread is also the first thread in the core. If not,
++ * skip to clear_lock.
++ */
++ bne cr2,clear_lock
++
++first_thread_in_core:
++
++ /*
++ * First thread in the core waking up from fastsleep. It needs to
++ * call the fastsleep workaround code if the platform requires it.
++ * Call it unconditionally here. The below branch instruction will
++ * be patched out when the idle states are discovered if platform
++ * does not require workaround.
++ */
++.global pnv_fastsleep_workaround_at_exit
++pnv_fastsleep_workaround_at_exit:
++ b fastsleep_workaround_at_exit
++
++timebase_resync:
++ /* Do timebase resync if we are waking up from sleep. Use cr3 value
++ * set in exceptions-64s.S */
++ ble cr3,clear_lock
++ /* Time base re-sync */
++ li r0,OPAL_RESYNC_TIMEBASE
++ bl opal_call_realmode;
++ /* TODO: Check r3 for failure */
++
++ /*
++ * If waking up from sleep, per core state is not lost, skip to
++ * clear_lock.
++ */
++ bne cr4,clear_lock
++
++ /* Restore per core state */
++ ld r4,_TSCR(r1)
++ mtspr SPRN_TSCR,r4
++ ld r4,_WORC(r1)
++ mtspr SPRN_WORC,r4
++
++clear_lock:
++ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
++ lwsync
++ stw r15,0(r14)
++
++common_exit:
++ /*
++ * Common to all threads.
++ *
++ * If waking up from sleep, hypervisor state is not lost. Hence
++ * skip hypervisor state restore.
++ */
++ bne cr4,hypervisor_state_restored
++
++ /* Waking up from winkle */
++
++ /* Restore per thread state */
++ bl __restore_cpu_power8
++
++ /* Restore SLB from PACA */
++ ld r8,PACA_SLBSHADOWPTR(r13)
++
++ .rept SLB_NUM_BOLTED
++ li r3, SLBSHADOW_SAVEAREA
++ LDX_BE r5, r8, r3
++ addi r3, r3, 8
++ LDX_BE r6, r8, r3
++ andis. r7,r5,SLB_ESID_V@h
++ beq 1f
++ slbmte r6,r5
++1: addi r8,r8,16
++ .endr
++
++ ld r4,_SPURR(r1)
++ mtspr SPRN_SPURR,r4
++ ld r4,_PURR(r1)
++ mtspr SPRN_PURR,r4
++ ld r4,_DSCR(r1)
++ mtspr SPRN_DSCR,r4
++ ld r4,_WORT(r1)
++ mtspr SPRN_WORT,r4
++
++hypervisor_state_restored:
++
++ mtspr SPRN_SRR1,r16
+ mtlr r17
+ blr /* Return back to System Reset vector from where
+ power7_restore_hyp_resource was invoked */
-
- fastsleep_workaround_at_exit:
- li r3,1
++
++fastsleep_workaround_at_exit:
++ li r3,1
++ li r4,0
++ li r0,OPAL_CONFIG_CPU_IDLE_STATE
++ bl opal_call_realmode
++ b timebase_resync
++
++/*
++ * R3 here contains the value that will be returned to the caller
++ * of power7_nap.
++ */
++_GLOBAL(power7_wakeup_loss)
++ ld r1,PACAR1(r13)
++BEGIN_FTR_SECTION
++ CHECK_HMI_INTERRUPT
++END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
++ REST_NVGPRS(r1)
++ REST_GPR(2, r1)
++ ld r6,_CCR(r1)
++ ld r4,_MSR(r1)
++ ld r5,_NIP(r1)
++ addi r1,r1,INT_FRAME_SIZE
++ mtcr r6
++ mtspr SPRN_SRR1,r4
++ mtspr SPRN_SRR0,r5
++ rfid
++
++/*
++ * R3 here contains the value that will be returned to the caller
++ * of power7_nap.
++ */
++_GLOBAL(power7_wakeup_noloss)
++ lbz r0,PACA_NAPSTATELOST(r13)
++ cmpwi r0,0
++ bne power7_wakeup_loss
++BEGIN_FTR_SECTION
++ CHECK_HMI_INTERRUPT
++END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
++ ld r1,PACAR1(r13)
++ ld r6,_CCR(r1)
++ ld r4,_MSR(r1)
++ ld r5,_NIP(r1)
++ addi r1,r1,INT_FRAME_SIZE
++ mtcr r6
++ mtspr SPRN_SRR1,r4
++ mtspr SPRN_SRR0,r5
++ rfid
--
2.4.11