From 21823fbda552252271c948850f80f15edfdf25b6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 9 Jun 2021 16:42:24 -0700 Subject: [PATCH] KVM: x86: Invalidate all PGDs for the current PCID on MOV CR3 w/ flush Flush and sync all PGDs for the current/target PCID on MOV CR3 with a TLB flush, i.e. without PCID_NOFLUSH set. Paraphrasing Intel's SDM regarding the behavior of MOV to CR3: - If CR4.PCIDE = 0, invalidates all TLB entries associated with PCID 000H and all entries in all paging-structure caches associated with PCID 000H. - If CR4.PCIDE = 1 and NOFLUSH=0, invalidates all TLB entries associated with the PCID specified in bits 11:0, and all entries in all paging-structure caches associated with that PCID. It is not required to invalidate entries in the TLBs and paging-structure caches that are associated with other PCIDs. - If CR4.PCIDE=1 and NOFLUSH=1, is not required to invalidate any TLB entries or entries in paging-structure caches. Extract and reuse the logic for INVPCID(single) which is effectively the same flow and works even if CR4.PCIDE=0, as the current PCID will be '0' in that case, thus honoring the requirement of flushing PCID=0. Continue passing skip_tlb_flush to kvm_mmu_new_pgd() even though it _should_ be redundant; the clean up will be done in a future patch. The overhead of an unnecessary nop sync is minimal (especially compared to the actual sync), and the TLB flush is handled via request. Avoiding the the negligible overhead is not worth the risk of breaking kernels that backport the fix. Fixes: 956bf3531fba ("kvm: x86: Skip shadow page resync on CR3 switch when indicated by guest") Cc: Junaid Shahid Signed-off-by: Sean Christopherson Message-Id: <20210609234235.1244004-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 69 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index efcdd1f46d64..8ed5f3252e9d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1062,26 +1062,46 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } EXPORT_SYMBOL_GPL(kvm_set_cr4); +static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid) +{ + struct kvm_mmu *mmu = vcpu->arch.mmu; + unsigned long roots_to_free = 0; + int i; + + /* + * If neither the current CR3 nor any of the prev_roots use the given + * PCID, then nothing needs to be done here because a resync will + * happen anyway before switching to any other CR3. + */ + if (kvm_get_active_pcid(vcpu) == pcid) { + kvm_mmu_sync_roots(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid) + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); + + kvm_mmu_free_roots(vcpu, mmu, roots_to_free); +} + int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { bool skip_tlb_flush = false; + unsigned long pcid = 0; #ifdef CONFIG_X86_64 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); if (pcid_enabled) { skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH; cr3 &= ~X86_CR3_PCID_NOFLUSH; + pcid = cr3 & X86_CR3_PCID_MASK; } #endif /* PDPTRs are always reloaded for PAE paging. */ - if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu)) { - if (!skip_tlb_flush) { - kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); - } - return 0; - } + if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu)) + goto handle_tlb_flush; /* * Do not condition the GPA check on long mode, this helper is used to @@ -1094,10 +1114,23 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; - kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush); + if (cr3 != kvm_read_cr3(vcpu)) + kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush); + vcpu->arch.cr3 = cr3; kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); +handle_tlb_flush: + /* + * A load of CR3 that flushes the TLB flushes only the current PCID, + * even if PCID is disabled, in which case PCID=0 is flushed. It's a + * moot point in the end because _disabling_ PCID will flush all PCIDs, + * and it's impossible to use a non-zero PCID when PCID is disabled, + * i.e. only PCID=0 can be relevant. + */ + if (!skip_tlb_flush) + kvm_invalidate_pcid(vcpu, pcid); + return 0; } EXPORT_SYMBOL_GPL(kvm_set_cr3); @@ -11952,8 +11985,6 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) { bool pcid_enabled; struct x86_exception e; - unsigned i; - unsigned long roots_to_free = 0; struct { u64 pcid; u64 gla; @@ -11987,23 +12018,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) return 1; } - if (kvm_get_active_pcid(vcpu) == operand.pcid) { - kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); - } - - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd) - == operand.pcid) - roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); - - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); - /* - * If neither the current cr3 nor any of the prev_roots use the - * given PCID, then nothing needs to be done here because a - * resync will happen anyway before switching to any other CR3. - */ - + kvm_invalidate_pcid(vcpu, operand.pcid); return kvm_skip_emulated_instruction(vcpu); case INVPCID_TYPE_ALL_NON_GLOBAL: -- 2.11.0