Re: [PATCH v6 19/42] x86/mm: Add support to validate memory when changing C-bit
From: Borislav Petkov <bp@alien8.de>
Date: 2021-11-09 19:34:18
Also in:
kvm, linux-efi, linux-mm, lkml, platform-driver-x86
On Fri, Oct 08, 2021 at 01:04:30PM -0500, Brijesh Singh wrote:
+static int vmgexit_psc(struct snp_psc_desc *desc)
+{
+ int cur_entry, end_entry, ret;
+ struct snp_psc_desc *data;
+ struct ghcb_state state;
+ struct ghcb *ghcb;
+ struct psc_hdr *hdr;
+ unsigned long flags;
int cur_entry, end_entry, ret;
struct snp_psc_desc *data;
struct ghcb_state state;
struct psc_hdr *hdr;
unsigned long flags;
struct ghcb *ghcb;
that's properly sorted.
+ + local_irq_save(flags);
What is that protecting against? Comment about it? Aha, __sev_get_ghcb() needs to run with IRQs disabled because it is using the per-CPU GHCB.
+
+ ghcb = __sev_get_ghcb(&state);
+ if (unlikely(!ghcb))
+ panic("SEV-SNP: Failed to get GHCB\n");
+
+ /* Copy the input desc into GHCB shared buffer */
+ data = (struct snp_psc_desc *)ghcb->shared_buffer;
+ memcpy(ghcb->shared_buffer, desc, sizeof(*desc));That shared buffer has a size - check it vs the size of the desc thing.
+ + hdr = &data->hdr;
Why do you need this and why can't you use data->hdr simply? /me continues reading and realizes why Oh no, this is tricky. The HV call will modify what @data points to and thus @hdr will point to new contents. Only then your backwards processing check below makes sense. So then you *absoulutely* want to use data->hdr everywhere and then also write why in the comment above the check that data gets updated by the HV call.
+ cur_entry = hdr->cur_entry; + end_entry = hdr->end_entry; + + /* + * As per the GHCB specification, the hypervisor can resume the guest + * before processing all the entries. Checks whether all the entries
Check
+ * are processed. If not, then keep retrying. + * + * The stragtegy here is to wait for the hypervisor to change the page + * state in the RMP table before guest access the memory pages. If the
accesses
+ * page state was not successful, then later memory access will result
"If the page state *change* was not ..."
+ * in the crash.
"in a crash."
+ */
+ while (hdr->cur_entry <= hdr->end_entry) {
+ ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
+
+ ret = sev_es_ghcb_hv_call(ghcb, NULL, SVM_VMGEXIT_PSC, 0, 0);This should be ret = sev_es_ghcb_hv_call(ghcb, true, NULL, SVM_VMGEXIT_PSC, 0, 0); as we changed it in the meantime to accomodate HyperV isolation VMs.
+
+ /*
+ * Page State Change VMGEXIT can pass error code through
+ * exit_info_2.
+ */
+ if (WARN(ret || ghcb->save.sw_exit_info_2,
+ "SEV-SNP: PSC failed ret=%d exit_info_2=%llx\n",
+ ret, ghcb->save.sw_exit_info_2)) {
+ ret = 1;That ret = 1 goes unused with that "return 0" at the end. It should be "return ret" at the end.. Ditto for the others. Audit all your exit paths in this function.
+ goto out;
+ }
+
+ /*
+ * Sanity check that entry processing is not going backward.
+ * This will happen only if hypervisor is tricking us.
+ */
+ if (WARN(hdr->end_entry > end_entry || cur_entry > hdr->cur_entry,
+"SEV-SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
+ end_entry, hdr->end_entry, cur_entry, hdr->cur_entry)) {
+ ret = 1;
+ goto out;
+ }
+
+ /* Verify that reserved bit is not set */
+ if (WARN(hdr->reserved, "Reserved bit is set in the PSC header\n")) {Shouldn't that thing happen first after the HV call?
+ ret = 1;
+ goto out;
+ }
+ }
+
+out:
+ __sev_put_ghcb(&state);
+ local_irq_restore(flags);
+
+ return 0;
+}
+
+static void __set_page_state(struct snp_psc_desc *data, unsigned long vaddr,
+ unsigned long vaddr_end, int op)
+{
+ struct psc_hdr *hdr;
+ struct psc_entry *e;
+ unsigned long pfn;
+ int i;
+
+ hdr = &data->hdr;
+ e = data->entries;
+
+ memset(data, 0, sizeof(*data));
+ i = 0;
+
+ while (vaddr < vaddr_end) {
+ if (is_vmalloc_addr((void *)vaddr))
+ pfn = vmalloc_to_pfn((void *)vaddr);
+ else
+ pfn = __pa(vaddr) >> PAGE_SHIFT;
+
+ e->gfn = pfn;
+ e->operation = op;
+ hdr->end_entry = i;
+
+ /*
+ * The GHCB specification provides the flexibility to
+ * use either 4K or 2MB page size in the RMP table.
+ * The current SNP support does not keep track of the
+ * page size used in the RMP table. To avoid the
+ * overlap request,"avoid overlap request"? No clue what that means. In general, that comment is talking about something in the future and is more confusing than explaining stuff.
use the 4K page size in the RMP + * table. + */ + e->pagesize = RMP_PG_SIZE_4K; + + vaddr = vaddr + PAGE_SIZE; + e++; + i++; + } + + if (vmgexit_psc(data)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); +} + +static void set_page_state(unsigned long vaddr, unsigned int npages, int op)
Yeah, so this should be named set_pages_state - notice the plural "pages" because it works on multiple pages, @npages exactly.
+{
+ unsigned long vaddr_end, next_vaddr;
+ struct snp_psc_desc *desc;
+
+ vaddr = vaddr & PAGE_MASK;
+ vaddr_end = vaddr + (npages << PAGE_SHIFT);Take those two...
+
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT);
+ if (!desc)
+ panic("SEV-SNP: failed to allocate memory for PSC descriptor\n");... and put them here. <---
quoted hunk ↗ jump to hunk
+ + while (vaddr < vaddr_end) { + /* + * Calculate the last vaddr that can be fit in one + * struct snp_psc_desc. + */ + next_vaddr = min_t(unsigned long, vaddr_end, + (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr); + + __set_page_state(desc, vaddr, next_vaddr, op); + + vaddr = next_vaddr; + } + + kfree(desc); +} + +void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) +{ + if (!cc_platform_has(CC_ATTR_SEV_SNP)) + return; + + pvalidate_pages(vaddr, npages, 0); + + set_page_state(vaddr, npages, SNP_PAGE_STATE_SHARED); +} + +void snp_set_memory_private(unsigned long vaddr, unsigned int npages) +{ + if (!cc_platform_has(CC_ATTR_SEV_SNP)) + return; + + set_page_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); + + pvalidate_pages(vaddr, npages, 1); +} + int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { u16 startup_cs, startup_ip;diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 527957586f3c..ffe51944606a 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c@@ -30,6 +30,7 @@ #include <asm/proto.h> #include <asm/memtype.h> #include <asm/set_memory.h> +#include <asm/sev.h> #include "../mm_internal.h"@@ -2010,8 +2011,22 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); + /* + * To maintain the security gurantees of SEV-SNP guest invalidate the memory
"guarantees" Your spellchecker broke again.
+ * before clearing the encryption attribute. + */ + if (!enc) + snp_set_memory_shared(addr, numpages); + ret = __change_page_attr_set_clr(&cpa, 1); + /* + * Now that memory is mapped encrypted in the page table, validate it + * so that is consistent with the above page state. + */ + if (!ret && enc) + snp_set_memory_private(addr, numpages); + /* * After changing the encryption attribute, we need to flush TLBs again * in case any speculative TLB caching occurred (but no need to flush -- 2.25.1
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette