--- v5
+++ v4
@@ -1,57 +1,378 @@
-While resolving the RMP page fault, we may run into cases where the page
-level between the RMP entry and TDP does not match and the 2M RMP entry
-must be split into 4K RMP entries. Or a 2M TDP page need to be broken
-into multiple of 4K pages.
-
-To keep the RMP and TDP page level in sync, we will zap the gfn range
-after splitting the pages in the RMP entry. The zap should force the
-TDP to gets rebuilt with the new page level.
-
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+Add support for the SEV-SNP AP Creation NAE event. This allows SEV-SNP
+guests to create and start APs on their own.
+
+A new event, KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, is created and used
+so as to avoid updating the VMSA pointer while the vCPU is running.
+
+For CREATE
+ The guest supplies the GPA of the VMSA to be used for the vCPU with the
+ specified APIC ID. The GPA is saved in the svm struct of the target
+ vCPU, the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event is added to the
+ vCPU and then the vCPU is kicked.
+
+For CREATE_ON_INIT:
+ The guest supplies the GPA of the VMSA to be used for the vCPU with the
+ specified APIC ID the next time an INIT is performed. The GPA is saved
+ in the svm struct of the target vCPU.
+
+For DESTROY:
+ The guest indicates it wishes to stop the vCPU. The GPA is cleared from
+ the svm struct, the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event is added
+ to vCPU and then the vCPU is kicked.
+
+
+The KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event handler will be invoked as
+a result of the event or as a result of an INIT. The handler sets the vCPU
+to the KVM_MP_STATE_UNINITIALIZED state, so that any errors will leave the
+vCPU as not runnable. Any previous VMSA pages that were installed as
+part of an SEV-SNP AP Creation NAE event are un-pinned. If a new VMSA is
+to be installed, the VMSA guest page is pinned and set as the VMSA in the
+vCPU VMCB and the vCPU state is set to KVM_MP_STATE_RUNNABLE. If a new
+VMSA is not to be installed, the VMSA is cleared in the vCPU VMCB and the
+vCPU state is left as KVM_MP_STATE_UNINITIALIZED to prevent it from being
+run.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
- arch/x86/include/asm/kvm_host.h | 2 ++
- arch/x86/kvm/mmu.h | 2 --
- arch/x86/kvm/mmu/mmu.c | 1 +
- 3 files changed, 3 insertions(+), 2 deletions(-)
+ arch/x86/include/asm/kvm_host.h | 3 +
+ arch/x86/include/asm/svm.h | 3 +
+ arch/x86/kvm/svm/sev.c | 133 ++++++++++++++++++++++++++++++++
+ arch/x86/kvm/svm/svm.c | 7 +-
+ arch/x86/kvm/svm/svm.h | 16 +++-
+ arch/x86/kvm/x86.c | 11 ++-
+ 6 files changed, 170 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index 5ac1ff097e8c..8773c1f9e45e 100644
+index 117e2e08d7ed..881e05b3f74e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
-@@ -1561,6 +1561,8 @@ void kvm_mmu_zap_all(struct kvm *kvm);
- void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
- unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
- void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
-+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
-+
-
- int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
-
-diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
-index 7c4fac53183d..f767a52f9178 100644
---- a/arch/x86/kvm/mmu.h
-+++ b/arch/x86/kvm/mmu.h
-@@ -228,8 +228,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- return -(u32)fault & errcode;
+@@ -91,6 +91,7 @@
+ #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
+ #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
++#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(31)
+
+ #define CR0_RESERVED_BITS \
+ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
+@@ -1402,6 +1403,8 @@ struct kvm_x86_ops {
+
+ int (*handle_rmp_page_fault)(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn,
+ int level, u64 error_code);
++
++ void (*update_protected_guest_state)(struct kvm_vcpu *vcpu);
+ };
+
+ struct kvm_x86_nested_ops {
+diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
+index 5e72faa00cf2..6634a952563e 100644
+--- a/arch/x86/include/asm/svm.h
++++ b/arch/x86/include/asm/svm.h
+@@ -220,6 +220,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
+ #define SVM_SEV_FEATURES_DEBUG_SWAP BIT(5)
+ #define SVM_SEV_FEATURES_PREVENT_HOST_IBS BIT(6)
+ #define SVM_SEV_FEATURES_BTB_ISOLATION BIT(7)
++#define SVM_SEV_FEATURES_INT_INJ_MODES \
++ (SVM_SEV_FEATURES_RESTRICTED_INJECTION | \
++ SVM_SEV_FEATURES_ALTERNATE_INJECTION)
+
+ struct vmcb_seg {
+ u16 selector;
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index d8ad6dd58c87..95f5d25b4f08 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -582,6 +582,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+
+ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+ {
++ struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ struct sev_es_save_area *save = svm->vmsa;
+
+ /* Check some debug related fields before encrypting the VMSA */
+@@ -625,6 +626,12 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+ if (sev_snp_guest(svm->vcpu.kvm))
+ save->sev_features |= SVM_SEV_FEATURES_SNP_ACTIVE;
+
++ /*
++ * Save the VMSA synced SEV features. For now, they are the same for
++ * all vCPUs, so just save each time.
++ */
++ sev->sev_features = save->sev_features;
++
+ return 0;
}
--void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
--
- int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
-
- int kvm_mmu_post_init_vm(struct kvm *kvm);
-diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
-index e660d832e235..56a7da49092d 100644
---- a/arch/x86/kvm/mmu/mmu.c
-+++ b/arch/x86/kvm/mmu/mmu.c
-@@ -5748,6 +5748,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
-
- return need_tlb_flush;
+@@ -2682,6 +2689,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+ if (!ghcb_sw_scratch_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
++ case SVM_VMGEXIT_AP_CREATION:
++ if (!ghcb_rax_is_valid(ghcb))
++ goto vmgexit_err;
++ break;
+ case SVM_VMGEXIT_NMI_COMPLETE:
+ case SVM_VMGEXIT_AP_HLT_LOOP:
+ case SVM_VMGEXIT_AP_JUMP_TABLE:
+@@ -3395,6 +3406,121 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
+ return ret;
}
-+EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
-
- void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *memslot)
+
++void sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
++{
++ struct vcpu_svm *svm = to_svm(vcpu);
++ kvm_pfn_t pfn;
++
++ mutex_lock(&svm->snp_vmsa_mutex);
++
++ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
++
++ /* Clear use of the VMSA in the sev_es_init_vmcb() path */
++ svm->vmsa_pa = 0;
++
++ /* Clear use of the VMSA from the VMCB */
++ svm->vmcb->control.vmsa_pa = 0;
++
++ /* Un-pin previous VMSA */
++ if (svm->snp_vmsa_pfn) {
++ kvm_release_pfn_dirty(svm->snp_vmsa_pfn);
++ svm->snp_vmsa_pfn = 0;
++ }
++
++ if (svm->snp_vmsa_gpa) {
++ /* Validate that the GPA is page aligned */
++ if (!PAGE_ALIGNED(svm->snp_vmsa_gpa))
++ goto e_unlock;
++
++ /*
++ * The VMSA is referenced by thy hypervisor physical address,
++ * so retrieve the PFN and pin it.
++ */
++ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(svm->snp_vmsa_gpa));
++ if (is_error_pfn(pfn))
++ goto e_unlock;
++
++ svm->snp_vmsa_pfn = pfn;
++
++ /* Use the new VMSA in the sev_es_init_vmcb() path */
++ svm->vmsa_pa = pfn_to_hpa(pfn);
++ svm->vmcb->control.vmsa_pa = svm->vmsa_pa;
++
++ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
++ } else {
++ vcpu->arch.pv.pv_unhalted = false;
++ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
++ }
++
++e_unlock:
++ mutex_unlock(&svm->snp_vmsa_mutex);
++}
++
++static void sev_snp_ap_creation(struct vcpu_svm *svm)
++{
++ struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
++ struct kvm_vcpu *vcpu = &svm->vcpu;
++ struct kvm_vcpu *target_vcpu;
++ struct vcpu_svm *target_svm;
++ unsigned int request;
++ unsigned int apic_id;
++ bool kick;
++
++ request = lower_32_bits(svm->vmcb->control.exit_info_1);
++ apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);
++
++ /* Validate the APIC ID */
++ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id);
++ if (!target_vcpu)
++ return;
++
++ target_svm = to_svm(target_vcpu);
++
++ kick = true;
++
++ mutex_lock(&target_svm->snp_vmsa_mutex);
++
++ target_svm->snp_vmsa_gpa = 0;
++ target_svm->snp_vmsa_update_on_init = false;
++
++ /* Interrupt injection mode shouldn't change for AP creation */
++ if (request < SVM_VMGEXIT_AP_DESTROY) {
++ u64 sev_features;
++
++ sev_features = vcpu->arch.regs[VCPU_REGS_RAX];
++ sev_features ^= sev->sev_features;
++ if (sev_features & SVM_SEV_FEATURES_INT_INJ_MODES) {
++ vcpu_unimpl(vcpu, "vmgexit: invalid AP injection mode [%#lx] from guest\n",
++ vcpu->arch.regs[VCPU_REGS_RAX]);
++ goto out;
++ }
++ }
++
++ switch (request) {
++ case SVM_VMGEXIT_AP_CREATE_ON_INIT:
++ kick = false;
++ target_svm->snp_vmsa_update_on_init = true;
++ fallthrough;
++ case SVM_VMGEXIT_AP_CREATE:
++ target_svm->snp_vmsa_gpa = svm->vmcb->control.exit_info_2;
++ break;
++ case SVM_VMGEXIT_AP_DESTROY:
++ break;
++ default:
++ vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n",
++ request);
++ break;
++ }
++
++out:
++ mutex_unlock(&target_svm->snp_vmsa_mutex);
++
++ if (kick) {
++ kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
++ kvm_vcpu_kick(target_vcpu);
++ }
++}
++
+ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+@@ -3523,6 +3649,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+ ret = 1;
+ break;
+ }
++ case SVM_VMGEXIT_AP_CREATION:
++ sev_snp_ap_creation(svm);
++
++ ret = 1;
++ break;
+ case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+ vcpu_unimpl(vcpu,
+ "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+@@ -3597,6 +3728,8 @@ void sev_es_create_vcpu(struct vcpu_svm *svm)
+ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+ GHCB_VERSION_MIN,
+ sev_enc_bit));
++
++ mutex_init(&svm->snp_vmsa_mutex);
+ }
+
+ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 74bc635c9608..078a569c85a8 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -1304,7 +1304,10 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ svm->spec_ctrl = 0;
+ svm->virt_spec_ctrl = 0;
+
+- if (!init_event) {
++ if (init_event && svm->snp_vmsa_update_on_init) {
++ svm->snp_vmsa_update_on_init = false;
++ sev_snp_update_protected_guest_state(vcpu);
++ } else {
+ vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
+ if (kvm_vcpu_is_reset_bsp(vcpu))
+@@ -4588,6 +4591,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+ .write_page_begin = sev_snp_write_page_begin,
+
+ .handle_rmp_page_fault = snp_handle_rmp_page_fault,
++
++ .update_protected_guest_state = sev_snp_update_protected_guest_state,
+ };
+
+ static struct kvm_x86_init_ops svm_init_ops __initdata = {
+diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
+index 285d9b97b4d2..f9d25d944f26 100644
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -60,18 +60,26 @@ struct kvm_sev_info {
+ bool active; /* SEV enabled guest */
+ bool es_active; /* SEV-ES enabled guest */
+ bool snp_active; /* SEV-SNP enabled guest */
++
+ unsigned int asid; /* ASID used for this guest */
+ unsigned int handle; /* SEV firmware handle */
+ int fd; /* SEV device fd */
++
+ unsigned long pages_locked; /* Number of pages locked */
+ struct list_head regions_list; /* List of registered regions */
++
+ u64 ap_jump_table; /* SEV-ES AP Jump Table address */
++
+ struct kvm *enc_context_owner; /* Owner of copied encryption context */
++
+ struct misc_cg *misc_cg; /* For misc cgroup accounting */
++
+ void *snp_context; /* SNP guest context page */
+ void *snp_resp_page; /* SNP guest response page */
+ struct ratelimit_state snp_guest_msg_rs; /* Rate limit the SNP guest message */
+ void *snp_certs_data;
++
++ u64 sev_features; /* Features set at VMSA creation */
+ };
+
+ struct kvm_svm {
+@@ -192,6 +200,11 @@ struct vcpu_svm {
+ bool guest_state_loaded;
+
+ u64 ghcb_registered_gpa;
++
++ struct mutex snp_vmsa_mutex;
++ gpa_t snp_vmsa_gpa;
++ kvm_pfn_t snp_vmsa_pfn;
++ bool snp_vmsa_update_on_init; /* SEV-SNP AP Creation on INIT-SIPI */
+ };
+
+ struct svm_cpu_data {
+@@ -555,7 +568,7 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
+ #define GHCB_VERSION_MAX 2ULL
+ #define GHCB_VERSION_MIN 1ULL
+
+-#define GHCB_HV_FT_SUPPORTED GHCB_HV_FT_SNP
++#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
+
+ extern unsigned int max_sev_asid;
+
+@@ -584,6 +597,7 @@ int sev_get_tdp_max_page_level(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level);
+ void sev_snp_write_page_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
+ int snp_handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn,
+ int level, u64 error_code);
++void sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu);
+
+ /* vmenter.S */
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 1398b8021982..e9fd59913bc2 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9279,6 +9279,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+
+ if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
+ static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
++
++ if (kvm_check_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) {
++ kvm_x86_ops.update_protected_guest_state(vcpu);
++ if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) {
++ r = 1;
++ goto out;
++ }
++ }
+ }
+
+ if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
+@@ -11236,7 +11244,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+
+- if (kvm_apic_has_events(vcpu))
++ if (kvm_apic_has_events(vcpu) ||
++ kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
+ return true;
+
+ if (vcpu->arch.pv.pv_unhalted)
--
2.17.1