[PATCH 16/18] KVM: x86: Take mem attributes into account when faulting memory
From: Nicolas Saenz Julienne <hidden>
Date: 2024-06-09 15:58:49
Also in:
kvm, linux-arch, linux-doc, linux-hyperv, lkml
Subsystem:
kernel virtual machine (kvm), kernel virtual machine for x86 (kvm/x86), the rest, x86 architecture (32-bit and 64-bit) · Maintainers:
Paolo Bonzini, Sean Christopherson, Linus Torvalds, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen
Take into account access restrictions memory attributes when faulting guest memory. Prohibited memory accesses will cause an user-space fault exit. Additionally, bypass a warning in the !tdp case. Access restrictions in guest page tables might not necessarily match the host pte's when memory attributes are in use. Signed-off-by: Nicolas Saenz Julienne <redacted> --- arch/x86/kvm/mmu/mmu.c | 64 ++++++++++++++++++++++++++++------ arch/x86/kvm/mmu/mmutrace.h | 29 +++++++++++++++ arch/x86/kvm/mmu/paging_tmpl.h | 2 +- include/linux/kvm_host.h | 4 +++ 4 files changed, 87 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 91edd873dcdbc..dfe50c9c31f7b 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c@@ -754,7 +754,8 @@ static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index) return sp->role.access; } -static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, +static void kvm_mmu_page_set_translation(struct kvm *kvm, + struct kvm_mmu_page *sp, int index, gfn_t gfn, unsigned int access) { if (sp_has_gptes(sp)) {
@@ -762,10 +763,17 @@ static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, return; } - WARN_ONCE(access != kvm_mmu_page_get_access(sp, index), - "access mismatch under %s page %llx (expected %u, got %u)\n", - sp->role.passthrough ? "passthrough" : "direct", - sp->gfn, kvm_mmu_page_get_access(sp, index), access); + /* + * Userspace might have introduced memory attributes for this gfn, + * breaking the assumption that the spte's access restrictions match + * the guest's. Userspace is also responsible from taking care of + * faults caused by these 'artificial' access restrictions. + */ + WARN_ONCE(access != kvm_mmu_page_get_access(sp, index) && + !kvm_get_memory_attributes(kvm, gfn), + "access mismatch under %s page %llx (expected %u, got %u)\n", + sp->role.passthrough ? "passthrough" : "direct", sp->gfn, + kvm_mmu_page_get_access(sp, index), access); WARN_ONCE(gfn != kvm_mmu_page_get_gfn(sp, index), "gfn mismatch under %s page %llx (expected %llx, got %llx)\n",
@@ -773,12 +781,12 @@ static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, sp->gfn, kvm_mmu_page_get_gfn(sp, index), gfn); } -static void kvm_mmu_page_set_access(struct kvm_mmu_page *sp, int index, - unsigned int access) +static void kvm_mmu_page_set_access(struct kvm *kvm, struct kvm_mmu_page *sp, + int index, unsigned int access) { gfn_t gfn = kvm_mmu_page_get_gfn(sp, index); - kvm_mmu_page_set_translation(sp, index, gfn, access); + kvm_mmu_page_set_translation(kvm, sp, index, gfn, access); } /*
@@ -1607,7 +1615,7 @@ static void __rmap_add(struct kvm *kvm, int rmap_count; sp = sptep_to_sp(spte); - kvm_mmu_page_set_translation(sp, spte_index(spte), gfn, access); + kvm_mmu_page_set_translation(kvm, sp, spte_index(spte), gfn, access); kvm_update_page_stats(kvm, sp->role.level, 1); rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
@@ -2928,7 +2936,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, rmap_add(vcpu, slot, sptep, gfn, pte_access); } else { /* Already rmapped but the pte_access bits may have changed. */ - kvm_mmu_page_set_access(sp, spte_index(sptep), pte_access); + kvm_mmu_page_set_access(vcpu->kvm, sp, spte_index(sptep), + pte_access); } return ret;
@@ -4320,6 +4329,38 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, return RET_PF_CONTINUE; } +static int kvm_mem_attributes_faultin_access_prots(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) +{ + bool may_read, may_write, may_exec; + unsigned long attrs; + + attrs = kvm_get_memory_attributes(vcpu->kvm, fault->gfn); + if (!attrs) + return RET_PF_CONTINUE; + + if (!kvm_mem_attributes_valid(vcpu->kvm, attrs)) { + kvm_err("Invalid mem attributes 0x%lx found for address 0x%016llx\n", + attrs, fault->addr); + return -EFAULT; + } + + trace_kvm_mem_attributes_faultin_access_prots(vcpu, fault, attrs); + + may_read = kvm_mem_attributes_may_read(attrs); + may_write = kvm_mem_attributes_may_write(attrs); + may_exec = kvm_mem_attributes_may_exec(attrs); + + if ((fault->user && !may_read) || (fault->write && !may_write) || + (fault->exec && !may_exec)) + return -EFAULT; + + fault->map_writable = may_write; + fault->map_executable = may_exec; + + return RET_PF_CONTINUE; +} + static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { bool async;
@@ -4375,7 +4416,8 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, * Now that we have a snapshot of mmu_invalidate_seq we can check for a * private vs. shared mismatch. */ - if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) { + if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn) || + kvm_mem_attributes_faultin_access_prots(vcpu, fault)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; }
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index 195d98bc8de85..ddbdd7396e9fa 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h@@ -440,6 +440,35 @@ TRACE_EVENT( __entry->gfn, __entry->spte, __entry->level, __entry->errno) ); +TRACE_EVENT(kvm_mem_attributes_faultin_access_prots, + TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, + u64 mem_attrs), + TP_ARGS(vcpu, fault, mem_attrs), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(unsigned long, guest_rip) + __field(u64, fault_address) + __field(bool, write) + __field(bool, exec) + __field(u64, mem_attrs) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->guest_rip = kvm_rip_read(vcpu); + __entry->fault_address = fault->addr; + __entry->write = fault->write; + __entry->exec = fault->exec; + __entry->mem_attrs = mem_attrs; + ), + + TP_printk("vcpu %d rip 0x%lx gfn 0x%016llx access %s mem_attrs 0x%llx", + __entry->vcpu_id, __entry->guest_rip, __entry->fault_address, + __entry->exec ? "X" : (__entry->write ? "W" : "R"), + __entry->mem_attrs) +); + #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index d3dbcf382ed2d..166f5f0e885e0 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h@@ -954,7 +954,7 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int return 0; /* Update the shadowed access bits in case they changed. */ - kvm_mmu_page_set_access(sp, i, pte_access); + kvm_mmu_page_set_access(vcpu->kvm, sp, i, pte_access); sptep = &sp->spt[i]; spte = *sptep;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 85378345e8e77..9c26161d13dea 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h@@ -2463,6 +2463,10 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { return false; } +static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) +{ + return 0; +} #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ #ifdef CONFIG_KVM_PRIVATE_MEM
--
2.40.1