Re: [PATCH v1 21/26] crypto: ccp: Add panic notifier for SEV/SNP firmware shutdown on kdump
From: Michael Roth <hidden>
Date: 2024-01-26 13:40:07
Also in:
kvm, linux-crypto, linux-mm, lkml
On Sun, Jan 21, 2024 at 12:49:00PM +0100, Borislav Petkov wrote:
On Sat, Dec 30, 2023 at 10:19:49AM -0600, Michael Roth wrote:quoted
From: Ashish Kalra <ashish.kalra@amd.com> Add a kdump safe version of sev_firmware_shutdown() registered as a crash_kexec_post_notifier, which is invoked during panic/crash to do SEV/SNP shutdown. This is required for transitioning all IOMMU pages to reclaim/hypervisor state, otherwise re-init of IOMMU pages during crashdump kernel boot fails and panics the crashdump kernel. This panic notifier runs in atomic context, hence it ensures not to acquire any locks/mutexes and polls for PSP command completion instead of depending on PSP command completion interrupt. Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> [mdr: remove use of "we" in comments] Signed-off-by: Michael Roth <redacted>Cleanups ontop, see if the below works too. Especially: * I've zapped the WBINVD before the TMR pages are freed because __sev_snp_shutdown_locked() will WBINVD anyway.
As Ashish mentioned the wbinvd_on_all_cpus() is still needed for general TMR handling even outside of panic/SNP, but I think you're right about the panic==true where the handling is SNP-specific and so the wbinvd() that gets called later during SNP shutdown will take care of it, so I've adjusted things accordingly.
* The mutex_is_locked() check in snp_shutdown_on_panic() is silly because the panic notifier runs on one CPU anyway.
Based on discussion with Ashish I've updated the comments in v2 regarding this to make it a little clearer why it might still be a good idea to keep that in to avoid weird unexpected behavior if we try to issue PSP commands while another one was already in-flight by another task before the panic. But I squashed in all the other changes here as-is. -Mike
quoted hunk ↗ jump to hunk
Thx. ---diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 435ba9bc4510..27323203e593 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h@@ -227,6 +227,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); +void kdump_sev_callback(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { }@@ -255,6 +256,7 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } +static inline void kdump_sev_callback(void) { } #endif #ifdef CONFIG_KVM_AMD_SEVdiff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 23ede774d31b..64ae3a1e5c30 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c@@ -40,6 +40,7 @@ #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> +#include <asm/sev.h> /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data {@@ -59,12 +60,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) */ cpu_emergency_stop_pt(); - /* - * for SNP do wbinvd() on remote CPUs to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); + kdump_sev_callback(); disable_local_APIC(); }diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c67285824e82..dbb2cc6b5666 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c@@ -2262,3 +2262,13 @@ static int __init snp_init_platform_device(void) return 0; } device_initcall(snp_init_platform_device); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the the local CPU. + */ + if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + wbinvd(); +}diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 598878e760bc..c342e5e54e45 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c@@ -161,7 +161,6 @@ static int sev_wait_cmd_ioc(struct sev_device *sev, udelay(10); } - return -ETIMEDOUT; }@@ -1654,7 +1653,7 @@ static int sev_update_firmware(struct device *dev) return ret; } -static int __sev_snp_shutdown_locked(int *error, bool in_panic) +static int __sev_snp_shutdown_locked(int *error, bool panic) { struct sev_device *sev = psp_master->sev_data; struct sev_data_snp_shutdown_ex data;@@ -1673,7 +1672,7 @@ static int __sev_snp_shutdown_locked(int *error, bool in_panic) * In that case, a wbinvd() is done on remote CPUs via the NMI * callback, so only a local wbinvd() is needed here. */ - if (!in_panic) + if (!panic) wbinvd_on_all_cpus(); else wbinvd();@@ -2199,26 +2198,13 @@ int sev_dev_init(struct psp_device *psp) return ret; } -static void __sev_firmware_shutdown(struct sev_device *sev, bool in_panic) +static void __sev_firmware_shutdown(struct sev_device *sev, bool panic) { int error; __sev_platform_shutdown_locked(NULL); if (sev_es_tmr) { - /* - * The TMR area was encrypted, flush it from the cache - * - * If invoked during panic handling, local interrupts are - * disabled and all CPUs are stopped, so wbinvd_on_all_cpus() - * can't be used. In that case, wbinvd() is done on remote CPUs - * via the NMI callback, so a local wbinvd() is sufficient here. - */ - if (!in_panic) - wbinvd_on_all_cpus(); - else - wbinvd(); - __snp_free_firmware_pages(virt_to_page(sev_es_tmr), get_order(sev_es_tmr_size), true);@@ -2237,7 +2223,7 @@ static void __sev_firmware_shutdown(struct sev_device *sev, bool in_panic) snp_range_list = NULL; } - __sev_snp_shutdown_locked(&error, in_panic); + __sev_snp_shutdown_locked(&error, panic); } static void sev_firmware_shutdown(struct sev_device *sev)@@ -2262,26 +2248,18 @@ void sev_dev_destroy(struct psp_device *psp) psp_clear_sev_irq_handler(psp); } -static int sev_snp_shutdown_on_panic(struct notifier_block *nb, - unsigned long reason, void *arg) +static int snp_shutdown_on_panic(struct notifier_block *nb, + unsigned long reason, void *arg) { struct sev_device *sev = psp_master->sev_data; - /* - * Panic callbacks are executed with all other CPUs stopped, - * so don't wait for sev_cmd_mutex to be released since it - * would block here forever. - */ - if (mutex_is_locked(&sev_cmd_mutex)) - return NOTIFY_DONE; - __sev_firmware_shutdown(sev, true); return NOTIFY_DONE; } -static struct notifier_block sev_snp_panic_notifier = { - .notifier_call = sev_snp_shutdown_on_panic, +static struct notifier_block snp_panic_notifier = { + .notifier_call = snp_shutdown_on_panic, }; int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,@@ -2322,7 +2300,7 @@ void sev_pci_init(void) "-SNP" : "", sev->api_major, sev->api_minor, sev->build); atomic_notifier_chain_register(&panic_notifier_list, - &sev_snp_panic_notifier); + &snp_panic_notifier); return; err:@@ -2339,5 +2317,5 @@ void sev_pci_exit(void) sev_firmware_shutdown(sev); atomic_notifier_chain_unregister(&panic_notifier_list, - &sev_snp_panic_notifier); + &snp_panic_notifier); }-- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette