From 1e76a3ce0d3cdfc6b506e21047a26471bc1cc92e Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 15 Oct 2021 12:30:21 -0400 Subject: [PATCH] KVM: cleanup allocation of rmaps and page tracking data Unify the flags for rmaps and page tracking data, using a single flag in struct kvm_arch and a single loop to go over all the address spaces and memslots. This avoids code duplication between alloc_all_memslots_rmaps and kvm_page_track_enable_mmu_write_tracking. Signed-off-by: David Stevens [This patch is the delta between David's v2 and v3, with conflicts fixed and my own commit message. - Paolo] Co-developed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 17 +++----- arch/x86/include/asm/kvm_page_track.h | 3 +- arch/x86/kvm/mmu.h | 22 +++++++--- arch/x86/kvm/mmu/mmu.c | 78 ++++++++++++++++++++++++++++------- arch/x86/kvm/mmu/page_track.c | 57 +++++++------------------ arch/x86/kvm/mmu/tdp_mmu.h | 2 - arch/x86/kvm/x86.c | 47 +-------------------- 7 files changed, 103 insertions(+), 123 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 88f0326c184a..b4fece3bb061 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1212,18 +1212,11 @@ struct kvm_arch { #endif /* CONFIG_X86_64 */ /* - * If set, rmaps have been allocated for all memslots and should be - * allocated for any newly created or modified memslots. + * If set, at least one shadow root has been allocated. This flag + * is used as one input when determining whether certain memslot + * related allocations are necessary. */ - bool memslots_have_rmaps; - - /* - * Set when the KVM mmu needs guest write access page tracking. If - * set, the necessary gfn_track arrays have been allocated for - * all memslots and should be allocated for any newly created or - * modified memslots. - */ - bool memslots_mmu_write_tracking; + bool shadow_root_allocated; #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; @@ -1946,7 +1939,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) int kvm_cpu_dirty_log_size(void); -int alloc_all_memslots_rmaps(struct kvm *kvm); +int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); #define KVM_CLOCK_VALID_FLAGS \ (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 79d84a94f8eb..9d4a3b1b25b9 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -49,7 +49,8 @@ struct kvm_page_track_notifier_node { int kvm_page_track_init(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm); -int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm); +bool kvm_page_track_write_tracking_enabled(struct kvm *kvm); +int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot); void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); int kvm_page_track_create_memslot(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 75367af1a6d3..2df48d60c949 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -304,14 +304,26 @@ int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); -static inline bool kvm_memslots_have_rmaps(struct kvm *kvm) +static inline bool kvm_shadow_root_allocated(struct kvm *kvm) { /* - * Read memslot_have_rmaps before rmap pointers. Hence, threads reading - * memslots_have_rmaps in any lock context are guaranteed to see the - * pointers. Pairs with smp_store_release in alloc_all_memslots_rmaps. + * Read shadow_root_allocated before related pointers. Hence, threads + * reading shadow_root_allocated in any lock context are guaranteed to + * see the pointers. Pairs with smp_store_release in + * mmu_first_shadow_root_alloc. */ - return smp_load_acquire(&kvm->arch.memslots_have_rmaps); + return smp_load_acquire(&kvm->arch.shadow_root_allocated); +} + +#ifdef CONFIG_X86_64 +static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; } +#else +static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; } +#endif + +static inline bool kvm_memslots_have_rmaps(struct kvm *kvm) +{ + return !is_tdp_mmu_enabled(kvm) || kvm_shadow_root_allocated(kvm); } static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 29e7a4bb26e9..701db0794581 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3397,6 +3397,67 @@ out_unlock: return r; } +static int mmu_first_shadow_root_alloc(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *slot; + int r = 0, i; + + /* + * Check if this is the first shadow root being allocated before + * taking the lock. + */ + if (kvm_shadow_root_allocated(kvm)) + return 0; + + mutex_lock(&kvm->slots_arch_lock); + + /* Recheck, under the lock, whether this is the first shadow root. */ + if (kvm_shadow_root_allocated(kvm)) + goto out_unlock; + + /* + * Check if anything actually needs to be allocated, e.g. all metadata + * will be allocated upfront if TDP is disabled. + */ + if (kvm_memslots_have_rmaps(kvm) && + kvm_page_track_write_tracking_enabled(kvm)) + goto out_success; + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + slots = __kvm_memslots(kvm, i); + kvm_for_each_memslot(slot, slots) { + /* + * Both of these functions are no-ops if the target is + * already allocated, so unconditionally calling both + * is safe. Intentionally do NOT free allocations on + * failure to avoid having to track which allocations + * were made now versus when the memslot was created. + * The metadata is guaranteed to be freed when the slot + * is freed, and will be kept/used if userspace retries + * KVM_RUN instead of killing the VM. + */ + r = memslot_rmap_alloc(slot, slot->npages); + if (r) + goto out_unlock; + r = kvm_page_track_write_tracking_alloc(slot); + if (r) + goto out_unlock; + } + } + + /* + * Ensure that shadow_root_allocated becomes true strictly after + * all the related pointers are set. + */ +out_success: + smp_store_release(&kvm->arch.shadow_root_allocated, true); + +out_unlock: + mutex_unlock(&kvm->slots_arch_lock); + return r; +} + static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -3427,11 +3488,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } } - r = alloc_all_memslots_rmaps(vcpu->kvm); - if (r) - return r; - - r = kvm_page_track_enable_mmu_write_tracking(vcpu->kvm); + r = mmu_first_shadow_root_alloc(vcpu->kvm); if (r) return r; @@ -5604,16 +5661,7 @@ void kvm_mmu_init_vm(struct kvm *kvm) spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - if (!kvm_mmu_init_tdp_mmu(kvm)) - /* - * No smp_load/store wrappers needed here as we are in - * VM init and there cannot be any memslots / other threads - * accessing this struct kvm yet. - */ - kvm->arch.memslots_have_rmaps = true; - - if (!tdp_enabled) - kvm->arch.memslots_mmu_write_tracking = true; + kvm_mmu_init_tdp_mmu(kvm); node->track_write = kvm_mmu_pte_write; node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index bb5d60bd4dbf..cc4eb5b7fb76 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -19,14 +19,10 @@ #include "mmu.h" #include "mmu_internal.h" -static bool write_tracking_enabled(struct kvm *kvm) +bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) { - /* - * Read memslots_mmu_write_tracking before gfn_track pointers. Pairs - * with smp_store_release in kvm_page_track_enable_mmu_write_tracking. - */ return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) || - smp_load_acquire(&kvm->arch.memslots_mmu_write_tracking); + !tdp_enabled || kvm_shadow_root_allocated(kvm); } void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) @@ -46,7 +42,8 @@ int kvm_page_track_create_memslot(struct kvm *kvm, int i; for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { - if (i == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(kvm)) + if (i == KVM_PAGE_TRACK_WRITE && + !kvm_page_track_write_tracking_enabled(kvm)) continue; slot->arch.gfn_track[i] = @@ -71,43 +68,18 @@ static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode) return true; } -int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm) +int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) { - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - unsigned short **gfn_track; - int i; + unsigned short *gfn_track; - if (write_tracking_enabled(kvm)) + if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) return 0; - mutex_lock(&kvm->slots_arch_lock); - - if (write_tracking_enabled(kvm)) { - mutex_unlock(&kvm->slots_arch_lock); - return 0; - } - - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - slots = __kvm_memslots(kvm, i); - kvm_for_each_memslot(slot, slots) { - gfn_track = slot->arch.gfn_track + KVM_PAGE_TRACK_WRITE; - *gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), - GFP_KERNEL_ACCOUNT); - if (*gfn_track == NULL) { - mutex_unlock(&kvm->slots_arch_lock); - return -ENOMEM; - } - } - } - - /* - * Ensure that memslots_mmu_write_tracking becomes true strictly - * after all the pointers are set. - */ - smp_store_release(&kvm->arch.memslots_mmu_write_tracking, true); - mutex_unlock(&kvm->slots_arch_lock); + gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); + if (gfn_track == NULL) + return -ENOMEM; + slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track; return 0; } @@ -147,7 +119,7 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, return; if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && - !write_tracking_enabled(kvm))) + !kvm_page_track_write_tracking_enabled(kvm))) return; update_gfn_track(slot, gfn, mode, 1); @@ -185,7 +157,7 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm, return; if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && - !write_tracking_enabled(kvm))) + !kvm_page_track_write_tracking_enabled(kvm))) return; update_gfn_track(slot, gfn, mode, -1); @@ -213,7 +185,8 @@ bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu, if (!slot) return false; - if (mode == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(vcpu->kvm)) + if (mode == KVM_PAGE_TRACK_WRITE && + !kvm_page_track_write_tracking_enabled(vcpu->kvm)) return false; index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index ceaf7ff3ca7c..476b133544dd 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -90,7 +90,6 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr, #ifdef CONFIG_X86_64 bool kvm_mmu_init_tdp_mmu(struct kvm *kvm); void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); -static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; } static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; } static inline bool is_tdp_mmu(struct kvm_mmu *mmu) @@ -112,7 +111,6 @@ static inline bool is_tdp_mmu(struct kvm_mmu *mmu) #else static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; } static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {} -static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; } static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; } static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; } #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index afdc5d186c50..ac6d31ec909f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11514,8 +11514,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) kvm_page_track_free_memslot(slot); } -static int memslot_rmap_alloc(struct kvm_memory_slot *slot, - unsigned long npages) +int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) { const int sz = sizeof(*slot->arch.rmap[0]); int i; @@ -11537,50 +11536,6 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, return 0; } -int alloc_all_memslots_rmaps(struct kvm *kvm) -{ - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - int r, i; - - /* - * Check if memslots alreday have rmaps early before acquiring - * the slots_arch_lock below. - */ - if (kvm_memslots_have_rmaps(kvm)) - return 0; - - mutex_lock(&kvm->slots_arch_lock); - - /* - * Read memslots_have_rmaps again, under the slots arch lock, - * before allocating the rmaps - */ - if (kvm_memslots_have_rmaps(kvm)) { - mutex_unlock(&kvm->slots_arch_lock); - return 0; - } - - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - slots = __kvm_memslots(kvm, i); - kvm_for_each_memslot(slot, slots) { - r = memslot_rmap_alloc(slot, slot->npages); - if (r) { - mutex_unlock(&kvm->slots_arch_lock); - return r; - } - } - } - - /* - * Ensure that memslots_have_rmaps becomes true strictly after - * all the rmap pointers are set. - */ - smp_store_release(&kvm->arch.memslots_have_rmaps, true); - mutex_unlock(&kvm->slots_arch_lock); - return 0; -} - static int kvm_alloc_memslot_metadata(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) -- 2.11.0