Inter-revision diff: patch 40

Comparing v5 (message) to v4 (message)

--- v5
+++ v4
@@ -1,57 +1,378 @@
-While resolving the RMP page fault, we may run into cases where the page
-level between the RMP entry and TDP does not match and the 2M RMP entry
-must be split into 4K RMP entries. Or a 2M TDP page need to be broken
-into multiple of 4K pages.
-
-To keep the RMP and TDP page level in sync, we will zap the gfn range
-after splitting the pages in the RMP entry. The zap should force the
-TDP to gets rebuilt with the new page level.
-
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+Add support for the SEV-SNP AP Creation NAE event. This allows SEV-SNP
+guests to create and start APs on their own.
+
+A new event, KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, is created and used
+so as to avoid updating the VMSA pointer while the vCPU is running.
+
+For CREATE
+  The guest supplies the GPA of the VMSA to be used for the vCPU with the
+  specified APIC ID. The GPA is saved in the svm struct of the target
+  vCPU, the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event is added to the
+  vCPU and then the vCPU is kicked.
+
+For CREATE_ON_INIT:
+  The guest supplies the GPA of the VMSA to be used for the vCPU with the
+  specified APIC ID the next time an INIT is performed. The GPA is saved
+  in the svm struct of the target vCPU.
+
+For DESTROY:
+  The guest indicates it wishes to stop the vCPU. The GPA is cleared from
+  the svm struct, the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event is added
+  to vCPU and then the vCPU is kicked.
+
+
+The KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event handler will be invoked as
+a result of the event or as a result of an INIT. The handler sets the vCPU
+to the KVM_MP_STATE_UNINITIALIZED state, so that any errors will leave the
+vCPU as not runnable. Any previous VMSA pages that were installed as
+part of an SEV-SNP AP Creation NAE event are un-pinned. If a new VMSA is
+to be installed, the VMSA guest page is pinned and set as the VMSA in the
+vCPU VMCB and the vCPU state is set to KVM_MP_STATE_RUNNABLE. If a new
+VMSA is not to be installed, the VMSA is cleared in the vCPU VMCB and the
+vCPU state is left as KVM_MP_STATE_UNINITIALIZED to prevent it from being
+run.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
 Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
 ---
- arch/x86/include/asm/kvm_host.h | 2 ++
- arch/x86/kvm/mmu.h              | 2 --
- arch/x86/kvm/mmu/mmu.c          | 1 +
- 3 files changed, 3 insertions(+), 2 deletions(-)
+ arch/x86/include/asm/kvm_host.h |   3 +
+ arch/x86/include/asm/svm.h      |   3 +
+ arch/x86/kvm/svm/sev.c          | 133 ++++++++++++++++++++++++++++++++
+ arch/x86/kvm/svm/svm.c          |   7 +-
+ arch/x86/kvm/svm/svm.h          |  16 +++-
+ arch/x86/kvm/x86.c              |  11 ++-
+ 6 files changed, 170 insertions(+), 3 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index 5ac1ff097e8c..8773c1f9e45e 100644
+index 117e2e08d7ed..881e05b3f74e 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
-@@ -1561,6 +1561,8 @@ void kvm_mmu_zap_all(struct kvm *kvm);
- void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
- unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
- void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
-+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
-+
- 
- int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
- 
-diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
-index 7c4fac53183d..f767a52f9178 100644
---- a/arch/x86/kvm/mmu.h
-+++ b/arch/x86/kvm/mmu.h
-@@ -228,8 +228,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- 	return -(u32)fault & errcode;
+@@ -91,6 +91,7 @@
+ #define KVM_REQ_MSR_FILTER_CHANGED	KVM_ARCH_REQ(29)
+ #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+ 	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
++#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE	KVM_ARCH_REQ(31)
+ 
+ #define CR0_RESERVED_BITS                                               \
+ 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
+@@ -1402,6 +1403,8 @@ struct kvm_x86_ops {
+ 
+ 	int (*handle_rmp_page_fault)(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn,
+ 			int level, u64 error_code);
++
++	void (*update_protected_guest_state)(struct kvm_vcpu *vcpu);
+ };
+ 
+ struct kvm_x86_nested_ops {
+diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
+index 5e72faa00cf2..6634a952563e 100644
+--- a/arch/x86/include/asm/svm.h
++++ b/arch/x86/include/asm/svm.h
+@@ -220,6 +220,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
+ #define SVM_SEV_FEATURES_DEBUG_SWAP		BIT(5)
+ #define SVM_SEV_FEATURES_PREVENT_HOST_IBS	BIT(6)
+ #define SVM_SEV_FEATURES_BTB_ISOLATION		BIT(7)
++#define SVM_SEV_FEATURES_INT_INJ_MODES			\
++	(SVM_SEV_FEATURES_RESTRICTED_INJECTION |	\
++	 SVM_SEV_FEATURES_ALTERNATE_INJECTION)
+ 
+ struct vmcb_seg {
+ 	u16 selector;
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index d8ad6dd58c87..95f5d25b4f08 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -582,6 +582,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ 
+ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+ {
++	struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ 	struct sev_es_save_area *save = svm->vmsa;
+ 
+ 	/* Check some debug related fields before encrypting the VMSA */
+@@ -625,6 +626,12 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+ 	if (sev_snp_guest(svm->vcpu.kvm))
+ 		save->sev_features |= SVM_SEV_FEATURES_SNP_ACTIVE;
+ 
++	/*
++	 * Save the VMSA synced SEV features. For now, they are the same for
++	 * all vCPUs, so just save each time.
++	 */
++	sev->sev_features = save->sev_features;
++
+ 	return 0;
  }
  
--void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
--
- int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
- 
- int kvm_mmu_post_init_vm(struct kvm *kvm);
-diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
-index e660d832e235..56a7da49092d 100644
---- a/arch/x86/kvm/mmu/mmu.c
-+++ b/arch/x86/kvm/mmu/mmu.c
-@@ -5748,6 +5748,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
- 
- 	return need_tlb_flush;
+@@ -2682,6 +2689,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+ 		if (!ghcb_sw_scratch_is_valid(ghcb))
+ 			goto vmgexit_err;
+ 		break;
++	case SVM_VMGEXIT_AP_CREATION:
++		if (!ghcb_rax_is_valid(ghcb))
++			goto vmgexit_err;
++		break;
+ 	case SVM_VMGEXIT_NMI_COMPLETE:
+ 	case SVM_VMGEXIT_AP_HLT_LOOP:
+ 	case SVM_VMGEXIT_AP_JUMP_TABLE:
+@@ -3395,6 +3406,121 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
+ 	return ret;
  }
-+EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
- 
- void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- 				   const struct kvm_memory_slot *memslot)
+ 
++void sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	kvm_pfn_t pfn;
++
++	mutex_lock(&svm->snp_vmsa_mutex);
++
++	vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
++
++	/* Clear use of the VMSA in the sev_es_init_vmcb() path */
++	svm->vmsa_pa = 0;
++
++	/* Clear use of the VMSA from the VMCB */
++	svm->vmcb->control.vmsa_pa = 0;
++
++	/* Un-pin previous VMSA */
++	if (svm->snp_vmsa_pfn) {
++		kvm_release_pfn_dirty(svm->snp_vmsa_pfn);
++		svm->snp_vmsa_pfn = 0;
++	}
++
++	if (svm->snp_vmsa_gpa) {
++		/* Validate that the GPA is page aligned */
++		if (!PAGE_ALIGNED(svm->snp_vmsa_gpa))
++			goto e_unlock;
++
++		/*
++		 * The VMSA is referenced by thy hypervisor physical address,
++		 * so retrieve the PFN and pin it.
++		 */
++		pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(svm->snp_vmsa_gpa));
++		if (is_error_pfn(pfn))
++			goto e_unlock;
++
++		svm->snp_vmsa_pfn = pfn;
++
++		/* Use the new VMSA in the sev_es_init_vmcb() path */
++		svm->vmsa_pa = pfn_to_hpa(pfn);
++		svm->vmcb->control.vmsa_pa = svm->vmsa_pa;
++
++		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
++	} else {
++		vcpu->arch.pv.pv_unhalted = false;
++		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
++	}
++
++e_unlock:
++	mutex_unlock(&svm->snp_vmsa_mutex);
++}
++
++static void sev_snp_ap_creation(struct vcpu_svm *svm)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
++	struct kvm_vcpu *vcpu = &svm->vcpu;
++	struct kvm_vcpu *target_vcpu;
++	struct vcpu_svm *target_svm;
++	unsigned int request;
++	unsigned int apic_id;
++	bool kick;
++
++	request = lower_32_bits(svm->vmcb->control.exit_info_1);
++	apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);
++
++	/* Validate the APIC ID */
++	target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id);
++	if (!target_vcpu)
++		return;
++
++	target_svm = to_svm(target_vcpu);
++
++	kick = true;
++
++	mutex_lock(&target_svm->snp_vmsa_mutex);
++
++	target_svm->snp_vmsa_gpa = 0;
++	target_svm->snp_vmsa_update_on_init = false;
++
++	/* Interrupt injection mode shouldn't change for AP creation */
++	if (request < SVM_VMGEXIT_AP_DESTROY) {
++		u64 sev_features;
++
++		sev_features = vcpu->arch.regs[VCPU_REGS_RAX];
++		sev_features ^= sev->sev_features;
++		if (sev_features & SVM_SEV_FEATURES_INT_INJ_MODES) {
++			vcpu_unimpl(vcpu, "vmgexit: invalid AP injection mode [%#lx] from guest\n",
++				    vcpu->arch.regs[VCPU_REGS_RAX]);
++			goto out;
++		}
++	}
++
++	switch (request) {
++	case SVM_VMGEXIT_AP_CREATE_ON_INIT:
++		kick = false;
++		target_svm->snp_vmsa_update_on_init = true;
++		fallthrough;
++	case SVM_VMGEXIT_AP_CREATE:
++		target_svm->snp_vmsa_gpa = svm->vmcb->control.exit_info_2;
++		break;
++	case SVM_VMGEXIT_AP_DESTROY:
++		break;
++	default:
++		vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n",
++			    request);
++		break;
++	}
++
++out:
++	mutex_unlock(&target_svm->snp_vmsa_mutex);
++
++	if (kick) {
++		kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
++		kvm_vcpu_kick(target_vcpu);
++	}
++}
++
+ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+ {
+ 	struct vcpu_svm *svm = to_svm(vcpu);
+@@ -3523,6 +3649,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+ 		ret = 1;
+ 		break;
+ 	}
++	case SVM_VMGEXIT_AP_CREATION:
++		sev_snp_ap_creation(svm);
++
++		ret = 1;
++		break;
+ 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+ 		vcpu_unimpl(vcpu,
+ 			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+@@ -3597,6 +3728,8 @@ void sev_es_create_vcpu(struct vcpu_svm *svm)
+ 	set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+ 					    GHCB_VERSION_MIN,
+ 					    sev_enc_bit));
++
++	mutex_init(&svm->snp_vmsa_mutex);
+ }
+ 
+ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 74bc635c9608..078a569c85a8 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -1304,7 +1304,10 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ 	svm->spec_ctrl = 0;
+ 	svm->virt_spec_ctrl = 0;
+ 
+-	if (!init_event) {
++	if (init_event && svm->snp_vmsa_update_on_init) {
++		svm->snp_vmsa_update_on_init = false;
++		sev_snp_update_protected_guest_state(vcpu);
++	} else {
+ 		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ 				       MSR_IA32_APICBASE_ENABLE;
+ 		if (kvm_vcpu_is_reset_bsp(vcpu))
+@@ -4588,6 +4591,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+ 	.write_page_begin = sev_snp_write_page_begin,
+ 
+ 	.handle_rmp_page_fault = snp_handle_rmp_page_fault,
++
++	.update_protected_guest_state = sev_snp_update_protected_guest_state,
+ };
+ 
+ static struct kvm_x86_init_ops svm_init_ops __initdata = {
+diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
+index 285d9b97b4d2..f9d25d944f26 100644
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -60,18 +60,26 @@ struct kvm_sev_info {
+ 	bool active;		/* SEV enabled guest */
+ 	bool es_active;		/* SEV-ES enabled guest */
+ 	bool snp_active;	/* SEV-SNP enabled guest */
++
+ 	unsigned int asid;	/* ASID used for this guest */
+ 	unsigned int handle;	/* SEV firmware handle */
+ 	int fd;			/* SEV device fd */
++
+ 	unsigned long pages_locked; /* Number of pages locked */
+ 	struct list_head regions_list;  /* List of registered regions */
++
+ 	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
++
+ 	struct kvm *enc_context_owner; /* Owner of copied encryption context */
++
+ 	struct misc_cg *misc_cg; /* For misc cgroup accounting */
++
+ 	void *snp_context;      /* SNP guest context page */
+ 	void *snp_resp_page;	/* SNP guest response page */
+ 	struct ratelimit_state snp_guest_msg_rs; /* Rate limit the SNP guest message */
+ 	void *snp_certs_data;
++
++	u64 sev_features;	/* Features set at VMSA creation */
+ };
+ 
+ struct kvm_svm {
+@@ -192,6 +200,11 @@ struct vcpu_svm {
+ 	bool guest_state_loaded;
+ 
+ 	u64 ghcb_registered_gpa;
++
++	struct mutex snp_vmsa_mutex;
++	gpa_t snp_vmsa_gpa;
++	kvm_pfn_t snp_vmsa_pfn;
++	bool snp_vmsa_update_on_init;	/* SEV-SNP AP Creation on INIT-SIPI */
+ };
+ 
+ struct svm_cpu_data {
+@@ -555,7 +568,7 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
+ #define GHCB_VERSION_MAX	2ULL
+ #define GHCB_VERSION_MIN	1ULL
+ 
+-#define GHCB_HV_FT_SUPPORTED	GHCB_HV_FT_SNP
++#define GHCB_HV_FT_SUPPORTED	(GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
+ 
+ extern unsigned int max_sev_asid;
+ 
+@@ -584,6 +597,7 @@ int sev_get_tdp_max_page_level(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level);
+ void sev_snp_write_page_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
+ int snp_handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn,
+ 			      int level, u64 error_code);
++void sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu);
+ 
+ /* vmenter.S */
+ 
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 1398b8021982..e9fd59913bc2 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9279,6 +9279,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+ 
+ 		if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
+ 			static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
++
++		if (kvm_check_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) {
++			kvm_x86_ops.update_protected_guest_state(vcpu);
++			if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) {
++				r = 1;
++				goto out;
++			}
++		}
+ 	}
+ 
+ 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
+@@ -11236,7 +11244,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+ 	if (!list_empty_careful(&vcpu->async_pf.done))
+ 		return true;
+ 
+-	if (kvm_apic_has_events(vcpu))
++	if (kvm_apic_has_events(vcpu) ||
++	    kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
+ 		return true;
+ 
+ 	if (vcpu->arch.pv.pv_unhalted)
 -- 
 2.17.1
 
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help