OSDN Git Service

x86/sev: Use SEV-SNP AP creation to start secondary CPUs
[tomoyo/tomoyo-test1.git] / arch / x86 / kernel / sev.c
index bf4b578..d7915ae 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/memblock.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/cpumask.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 #include <asm/svm.h>
 #include <asm/smp.h>
 #include <asm/cpu.h>
+#include <asm/apic.h>
 
 #define DR7_RESET_VALUE        0x400
 
+/* AP INIT values as documented in the APM2  section "Processor Initialization State" */
+#define AP_INIT_CS_LIMIT               0xffff
+#define AP_INIT_DS_LIMIT               0xffff
+#define AP_INIT_LDTR_LIMIT             0xffff
+#define AP_INIT_GDTR_LIMIT             0xffff
+#define AP_INIT_IDTR_LIMIT             0xffff
+#define AP_INIT_TR_LIMIT               0xffff
+#define AP_INIT_RFLAGS_DEFAULT         0x2
+#define AP_INIT_DR6_DEFAULT            0xffff0ff0
+#define AP_INIT_GPAT_DEFAULT           0x0007040600070406ULL
+#define AP_INIT_XCR0_DEFAULT           0x1
+#define AP_INIT_X87_FTW_DEFAULT                0x5555
+#define AP_INIT_X87_FCW_DEFAULT                0x0040
+#define AP_INIT_CR0_DEFAULT            0x60000010
+#define AP_INIT_MXCSR_DEFAULT          0x1f80
+
 /* For early boot hypervisor communication in SEV-ES enabled guests */
 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
 
@@ -90,6 +108,8 @@ struct ghcb_state {
 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
 DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
 
+static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+
 static __always_inline bool on_vc_stack(struct pt_regs *regs)
 {
        unsigned long sp = regs->sp;
@@ -823,6 +843,228 @@ void snp_set_memory_private(unsigned long vaddr, unsigned int npages)
        pvalidate_pages(vaddr, npages, true);
 }
 
+static int snp_set_vmsa(void *va, bool vmsa)
+{
+       u64 attrs;
+
+       /*
+        * Running at VMPL0 allows the kernel to change the VMSA bit for a page
+        * using the RMPADJUST instruction. However, for the instruction to
+        * succeed it must target the permissions of a lesser privileged
+        * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST
+        * instruction in the AMD64 APM Volume 3).
+        */
+       attrs = 1;
+       if (vmsa)
+               attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+       return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+}
+
+#define __ATTR_BASE            (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
+#define INIT_CS_ATTRIBS                (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
+#define INIT_DS_ATTRIBS                (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
+
+#define INIT_LDTR_ATTRIBS      (SVM_SELECTOR_P_MASK | 2)
+#define INIT_TR_ATTRIBS                (SVM_SELECTOR_P_MASK | 3)
+
+static void *snp_alloc_vmsa_page(void)
+{
+       struct page *p;
+
+       /*
+        * Allocate VMSA page to work around the SNP erratum where the CPU will
+        * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
+        * collides with the RMP entry of VMSA page. The recommended workaround
+        * is to not use a large page.
+        *
+        * Allocate an 8k page which is also 8k-aligned.
+        */
+       p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+       if (!p)
+               return NULL;
+
+       split_page(p, 1);
+
+       /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
+       __free_page(p);
+
+       return page_address(p + 1);
+}
+
+static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
+{
+       int err;
+
+       err = snp_set_vmsa(vmsa, false);
+       if (err)
+               pr_err("clear VMSA page failed (%u), leaking page\n", err);
+       else
+               free_page((unsigned long)vmsa);
+}
+
+static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip)
+{
+       struct sev_es_save_area *cur_vmsa, *vmsa;
+       struct ghcb_state state;
+       unsigned long flags;
+       struct ghcb *ghcb;
+       u8 sipi_vector;
+       int cpu, ret;
+       u64 cr4;
+
+       /*
+        * The hypervisor SNP feature support check has happened earlier, just check
+        * the AP_CREATION one here.
+        */
+       if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
+               return -EOPNOTSUPP;
+
+       /*
+        * Verify the desired start IP against the known trampoline start IP
+        * to catch any future new trampolines that may be introduced that
+        * would require a new protected guest entry point.
+        */
+       if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
+                     "Unsupported SNP start_ip: %lx\n", start_ip))
+               return -EINVAL;
+
+       /* Override start_ip with known protected guest start IP */
+       start_ip = real_mode_header->sev_es_trampoline_start;
+
+       /* Find the logical CPU for the APIC ID */
+       for_each_present_cpu(cpu) {
+               if (arch_match_cpu_phys_id(cpu, apic_id))
+                       break;
+       }
+       if (cpu >= nr_cpu_ids)
+               return -EINVAL;
+
+       cur_vmsa = per_cpu(sev_vmsa, cpu);
+
+       /*
+        * A new VMSA is created each time because there is no guarantee that
+        * the current VMSA is the kernels or that the vCPU is not running. If
+        * an attempt was done to use the current VMSA with a running vCPU, a
+        * #VMEXIT of that vCPU would wipe out all of the settings being done
+        * here.
+        */
+       vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page();
+       if (!vmsa)
+               return -ENOMEM;
+
+       /* CR4 should maintain the MCE value */
+       cr4 = native_read_cr4() & X86_CR4_MCE;
+
+       /* Set the CS value based on the start_ip converted to a SIPI vector */
+       sipi_vector             = (start_ip >> 12);
+       vmsa->cs.base           = sipi_vector << 12;
+       vmsa->cs.limit          = AP_INIT_CS_LIMIT;
+       vmsa->cs.attrib         = INIT_CS_ATTRIBS;
+       vmsa->cs.selector       = sipi_vector << 8;
+
+       /* Set the RIP value based on start_ip */
+       vmsa->rip               = start_ip & 0xfff;
+
+       /* Set AP INIT defaults as documented in the APM */
+       vmsa->ds.limit          = AP_INIT_DS_LIMIT;
+       vmsa->ds.attrib         = INIT_DS_ATTRIBS;
+       vmsa->es                = vmsa->ds;
+       vmsa->fs                = vmsa->ds;
+       vmsa->gs                = vmsa->ds;
+       vmsa->ss                = vmsa->ds;
+
+       vmsa->gdtr.limit        = AP_INIT_GDTR_LIMIT;
+       vmsa->ldtr.limit        = AP_INIT_LDTR_LIMIT;
+       vmsa->ldtr.attrib       = INIT_LDTR_ATTRIBS;
+       vmsa->idtr.limit        = AP_INIT_IDTR_LIMIT;
+       vmsa->tr.limit          = AP_INIT_TR_LIMIT;
+       vmsa->tr.attrib         = INIT_TR_ATTRIBS;
+
+       vmsa->cr4               = cr4;
+       vmsa->cr0               = AP_INIT_CR0_DEFAULT;
+       vmsa->dr7               = DR7_RESET_VALUE;
+       vmsa->dr6               = AP_INIT_DR6_DEFAULT;
+       vmsa->rflags            = AP_INIT_RFLAGS_DEFAULT;
+       vmsa->g_pat             = AP_INIT_GPAT_DEFAULT;
+       vmsa->xcr0              = AP_INIT_XCR0_DEFAULT;
+       vmsa->mxcsr             = AP_INIT_MXCSR_DEFAULT;
+       vmsa->x87_ftw           = AP_INIT_X87_FTW_DEFAULT;
+       vmsa->x87_fcw           = AP_INIT_X87_FCW_DEFAULT;
+
+       /* SVME must be set. */
+       vmsa->efer              = EFER_SVME;
+
+       /*
+        * Set the SNP-specific fields for this VMSA:
+        *   VMPL level
+        *   SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
+        */
+       vmsa->vmpl              = 0;
+       vmsa->sev_features      = sev_status >> 2;
+
+       /* Switch the page over to a VMSA page now that it is initialized */
+       ret = snp_set_vmsa(vmsa, true);
+       if (ret) {
+               pr_err("set VMSA page failed (%u)\n", ret);
+               free_page((unsigned long)vmsa);
+
+               return -EINVAL;
+       }
+
+       /* Issue VMGEXIT AP Creation NAE event */
+       local_irq_save(flags);
+
+       ghcb = __sev_get_ghcb(&state);
+
+       vc_ghcb_invalidate(ghcb);
+       ghcb_set_rax(ghcb, vmsa->sev_features);
+       ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
+       ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE);
+       ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
+
+       sev_es_wr_ghcb_msr(__pa(ghcb));
+       VMGEXIT();
+
+       if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
+           lower_32_bits(ghcb->save.sw_exit_info_1)) {
+               pr_err("SNP AP Creation error\n");
+               ret = -EINVAL;
+       }
+
+       __sev_put_ghcb(&state);
+
+       local_irq_restore(flags);
+
+       /* Perform cleanup if there was an error */
+       if (ret) {
+               snp_cleanup_vmsa(vmsa);
+               vmsa = NULL;
+       }
+
+       /* Free up any previous VMSA page */
+       if (cur_vmsa)
+               snp_cleanup_vmsa(cur_vmsa);
+
+       /* Record the current VMSA page */
+       per_cpu(sev_vmsa, cpu) = vmsa;
+
+       return ret;
+}
+
+void snp_set_wakeup_secondary_cpu(void)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       /*
+        * Always set this override if SNP is enabled. This makes it the
+        * required method to start APs under SNP. If the hypervisor does
+        * not support AP creation, then no APs will be started.
+        */
+       apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
+}
+
 int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
 {
        u16 startup_cs, startup_ip;