Re: [PATCH 2/2] KVM: PPC: Book3E: Get vcpu's last instruction for emulation
From: Alexander Graf <hidden>
Date: 2013-07-09 21:45:14
Also in:
kvm
On 28.06.2013, at 11:20, Mihai Caraman wrote:
lwepx faults needs to be handled by KVM and this implies additional =
code
in DO_KVM macro to identify the source of the exception originated =
from
host context. This requires to check the Exception Syndrome Register (ESR[EPID]) and External PID Load Context Register (EPLC[EGS]) for =
DTB_MISS,
DSI and LRAT exceptions which is too intrusive for the host. =20 Get rid of lwepx and acquire last instuction in kvmppc_handle_exit() =
by
searching for the physical address and kmap it. This fixes an infinite =
loop
caused by lwepx's data TLB miss handled in the host and the TODO for =
TLB
eviction and execute-but-not-read entries. =20 Signed-off-by: Mihai Caraman <redacted> --- Resend this pacth for Alex G. he was unsubscribed from kvm-ppc mailist for a while. =20 arch/powerpc/include/asm/mmu-book3e.h | 6 ++- arch/powerpc/kvm/booke.c | 6 +++ arch/powerpc/kvm/booke.h | 2 + arch/powerpc/kvm/bookehv_interrupts.S | 32 ++------------- arch/powerpc/kvm/e500.c | 4 ++ arch/powerpc/kvm/e500mc.c | 69 =
+++++++++++++++++++++++++++++++++
quoted hunk ↗ jump to hunk
6 files changed, 91 insertions(+), 28 deletions(-) =20diff --git a/arch/powerpc/include/asm/mmu-book3e.h =
b/arch/powerpc/include/asm/mmu-book3e.h
quoted hunk ↗ jump to hunk
index 99d43e0..32e470e 100644--- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h@@ -40,7 +40,10 @@=20 /* MAS registers bit definitions */ =20 -#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) +#define MAS0_TLBSEL_MASK 0x30000000 +#define MAS0_TLBSEL_SHIFT 28 +#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & =
MAS0_TLBSEL_MASK)
+#define MAS0_GET_TLBSEL(mas0) (((mas0) & MAS0_TLBSEL_MASK) >> =
MAS0_TLBSEL_SHIFT)
#define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_ESEL_SHIFT 16 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & =
MAS0_ESEL_MASK)
quoted hunk ↗ jump to hunk
@@ -58,6 +61,7 @@#define MAS1_TSIZE_MASK 0x00000f80 #define MAS1_TSIZE_SHIFT 7 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & =
MAS1_TSIZE_MASK)
+#define MAS1_GET_TSIZE(mas1) (((mas1) & MAS1_TSIZE_MASK) >> =
MAS1_TSIZE_SHIFT)
quoted hunk ↗ jump to hunk
=20 #define MAS2_EPN (~0xFFFUL) #define MAS2_X0 0x00000040diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1020119..6764a8e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c@@ -836,6 +836,12 @@ int kvmppc_handle_exit(struct kvm_run *run, =
struct kvm_vcpu *vcpu,
/* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); =20 + /* + * The exception type can change at this point, such as if the =
TLB entry
quoted hunk ↗ jump to hunk
+ * for the emulated instruction has been evicted. + */ + kvmppc_prepare_for_emulation(vcpu, &exit_nr); + /* restart interrupts if they were meant for the host */ kvmppc_restart_interrupt(vcpu, exit_nr); =20diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 5fd1ba6..a0d0fea 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h@@ -90,6 +90,8 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu); =20 +void kvmppc_prepare_for_emulation(struct kvm_vcpu *vcpu, unsigned int =
*exit_nr);
quoted hunk ↗ jump to hunk
+ enum int_class { INT_CLASS_NONCRIT, INT_CLASS_CRIT,diff --git a/arch/powerpc/kvm/bookehv_interrupts.S =
b/arch/powerpc/kvm/bookehv_interrupts.S
quoted hunk ↗ jump to hunk
index 20c7a54..0538ab9 100644--- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S@@ -120,37 +120,20 @@=20 .if \flags & NEED_EMU /* - * This assumes you have external PID support. - * To support a bookehv CPU without external PID, you'll - * need to look up the TLB entry and create a temporary mapping. - * - * FIXME: we don't currently handle if the lwepx faults. =
PR-mode
- * booke doesn't handle it either. Since Linux doesn't use - * broadcast tlbivax anymore, the only way this should happen is - * if the guest maps its memory execute-but-not-read, or if we - * somehow take a TLB miss in the middle of this entry code and - * evict the relevant entry. On e500mc, all kernel lowmem is - * bolted into TLB1 large page mappings, and we don't use - * broadcast invalidates, so we should not take a TLB miss here. - * - * Later we'll need to deal with faults here. Disallowing guest - * mappings that are execute-but-not-read could be an option on - * e500mc, but not on chips with an LRAT if it is used. + * We don't use external PID support. lwepx faults would need to =
be
+ * handled by KVM and this implies aditional code in DO_KVM (for + * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] =
which
+ * is too intrusive for the host. Get last instuction in + * kvmppc_handle_exit(). */ - - mfspr r3, SPRN_EPLC /* will already have correct ELPID and =
EGS */
quoted hunk ↗ jump to hunk
PPC_STL r15, VCPU_GPR(R15)(r4) PPC_STL r16, VCPU_GPR(R16)(r4) PPC_STL r17, VCPU_GPR(R17)(r4) PPC_STL r18, VCPU_GPR(R18)(r4) PPC_STL r19, VCPU_GPR(R19)(r4) - mr r8, r3 PPC_STL r20, VCPU_GPR(R20)(r4) - rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS PPC_STL r21, VCPU_GPR(R21)(r4) - rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR PPC_STL r22, VCPU_GPR(R22)(r4) - rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID PPC_STL r23, VCPU_GPR(R23)(r4) PPC_STL r24, VCPU_GPR(R24)(r4) PPC_STL r25, VCPU_GPR(R25)(r4)@@ -160,11 +143,6 @@PPC_STL r29, VCPU_GPR(R29)(r4) PPC_STL r30, VCPU_GPR(R30)(r4) PPC_STL r31, VCPU_GPR(R31)(r4) - mtspr SPRN_EPLC, r8 - isync - lwepx r9, 0, r5 - mtspr SPRN_EPLC, r3 - stw r9, VCPU_LAST_INST(r4) .endif =20 .if \flags & NEED_ESRdiff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index ce6b73c..c82a89f 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c@@ -439,6 +439,10 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 =
id,
return r; } =20 +void kvmppc_prepare_for_emulation(struct kvm_vcpu *vcpu, unsigned int =
*exit_nr)
+{
+}
+
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int =id)
quoted hunk ↗ jump to hunk
{ struct kvmppc_vcpu_e500 *vcpu_e500;diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index c3bdc0a..3641df7 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c@@ -271,6 +271,75 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 =
id,
return r; } =20 +void kvmppc_prepare_for_emulation(struct kvm_vcpu *vcpu, unsigned int =
*exit_nr)
+{
+ gva_t geaddr;
+ hpa_t addr;
+ u64 mas7_mas3;
+ hva_t eaddr;
+ u32 mas1, mas3;
+ struct page *page;
+ unsigned int addr_space, psize_shift;
+ bool pr;
+
+ if ((*exit_nr !=3D BOOKE_INTERRUPT_DATA_STORAGE) &&
+ (*exit_nr !=3D BOOKE_INTERRUPT_DTLB_MISS) &&
+ (*exit_nr !=3D BOOKE_INTERRUPT_HV_PRIV))
+ return;
+
+ /* Search guest translation to find the real addressss */
+ geaddr =3D vcpu->arch.pc;
+ addr_space =3D (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG;
+ mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | =addr_space);
+ mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
+ isync();
+ asm volatile("tlbsx 0, %[geaddr]\n" : : [geaddr] "r" (geaddr));
+ mtspr(SPRN_MAS5, 0);
+ mtspr(SPRN_MAS8, 0);=09
+
+ mas1 =3D mfspr(SPRN_MAS1);
+ if (!(mas1 & MAS1_VALID)) {
+ /*
+ * There is no translation for the emulated instruction.
+ * Simulate an instruction TLB miss. This should force =the host
+ * or ultimately the guest to add the translation and =
then
+ * reexecute the instruction. + */ + *exit_nr =3D BOOKE_INTERRUPT_ITLB_MISS; + return; + } + + mas3 =3D mfspr(SPRN_MAS3); + pr =3D vcpu->arch.shared->msr & MSR_PR; + if ((pr && (!(mas3 & MAS3_UX))) || ((!pr) && (!(mas3 & =
MAS3_SX)))) {+ /* + * Another thread may rewrite the TLB entry in parallel, =
don't
+ * execute from the address if the execute permission is =
not set
+ */ + vcpu->arch.fault_esr =3D 0; + *exit_nr =3D BOOKE_INTERRUPT_INST_STORAGE; + return; + } + + /* Get page size */ + if (MAS0_GET_TLBSEL(mfspr(SPRN_MAS0)) =3D=3D 0) + psize_shift =3D PAGE_SHIFT; + else + psize_shift =3D MAS1_GET_TSIZE(mas1) + 10; + + mas7_mas3 =3D (((u64) mfspr(SPRN_MAS7)) << 32) | + mfspr(SPRN_MAS3); + addr =3D (mas7_mas3 & (~0ULL << psize_shift)) | + (geaddr & ((1ULL << psize_shift) - 1ULL)); + + /* Map a page and get guest's instruction */ + page =3D pfn_to_page(addr >> PAGE_SHIFT);
While looking at this I just realized that you're missing a check here. = What if our IP is in some PCI BAR? Or can't we execute from those? Alex
+ eaddr =3D (unsigned long)kmap_atomic(page); + eaddr |=3D addr & ~PAGE_MASK; + vcpu->arch.last_inst =3D *(u32 *)eaddr; + kunmap_atomic((u32 *)eaddr); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int =
id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
--=20
1.7.4.1
=20
=20
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html