From 24ae4cfaaaa22a4f293acd0c7d97804454b7e9fb Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 1 Apr 2021 16:37:34 -0700 Subject: [PATCH] KVM: x86/mmu: Allow enabling/disabling dirty logging under MMU read lock To reduce lock contention and interference with page fault handlers, allow the TDP MMU functions which enable and disable dirty logging to operate under the MMU read lock. Signed-off-by: Ben Gardon Message-Id: <20210401233736.638171-12-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 16 +++++++++--- arch/x86/kvm/mmu/tdp_mmu.c | 62 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2390e8a8fff1..61c0d1091fc5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5532,10 +5532,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, write_lock(&kvm->mmu_lock); flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, start_level, KVM_MAX_HUGEPAGE_LEVEL, false); - if (is_tdp_mmu_enabled(kvm)) - flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); write_unlock(&kvm->mmu_lock); + if (is_tdp_mmu_enabled(kvm)) { + read_lock(&kvm->mmu_lock); + flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); + read_unlock(&kvm->mmu_lock); + } + /* * We can flush all the TLBs out of the mmu lock without TLB * corruption since we just change the spte from writable to @@ -5638,10 +5642,14 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, write_lock(&kvm->mmu_lock); flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false); - if (is_tdp_mmu_enabled(kvm)) - flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); write_unlock(&kvm->mmu_lock); + if (is_tdp_mmu_enabled(kvm)) { + read_lock(&kvm->mmu_lock); + flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); + read_unlock(&kvm->mmu_lock); + } + /* * It's also safe to flush TLBs out of mmu lock here as currently this * function is only used for dirty logging, in which case flushing TLB diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6a3b7ba4aa3c..335c126d9860 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -495,8 +495,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, } /* - * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the - * associated bookkeeping + * tdp_mmu_set_spte_atomic_no_dirty_log - Set a TDP MMU SPTE atomically + * and handle the associated bookkeeping, but do not mark the page dirty + * in KVM's dirty bitmaps. * * @kvm: kvm instance * @iter: a tdp_iter instance currently on the SPTE that should be set @@ -504,9 +505,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, * Returns: true if the SPTE was set, false if it was not. If false is returned, * this function will have no side-effects. */ -static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, - struct tdp_iter *iter, - u64 new_spte) +static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm, + struct tdp_iter *iter, + u64 new_spte) { lockdep_assert_held_read(&kvm->mmu_lock); @@ -521,12 +522,25 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, new_spte) != iter->old_spte) return false; - handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, - new_spte, iter->level, true); + __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, + new_spte, iter->level, true); + handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level); return true; } +static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, + struct tdp_iter *iter, + u64 new_spte) +{ + if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte)) + return false; + + handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn, + iter->old_spte, new_spte, iter->level); + return true; +} + static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, struct tdp_iter *iter) { @@ -1082,7 +1096,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, for_each_tdp_pte_min_level(iter, root->spt, root->role.level, min_level, start, end) { - if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false)) +retry: + if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; if (!is_shadow_present_pte(iter.old_spte) || @@ -1092,7 +1107,15 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, new_spte = iter.old_spte & ~PT_WRITABLE_MASK; - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); + if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter, + new_spte)) { + /* + * The iter must explicitly re-read the SPTE because + * the atomic cmpxchg failed. + */ + iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep)); + goto retry; + } spte_set = true; } @@ -1111,7 +1134,9 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_mmu_page *root; bool spte_set = false; - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false) + lockdep_assert_held_read(&kvm->mmu_lock); + + for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); @@ -1135,7 +1160,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); tdp_root_for_each_leaf_pte(iter, root, start, end) { - if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false)) +retry: + if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; if (spte_ad_need_write_protect(iter.old_spte)) { @@ -1150,7 +1176,15 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, continue; } - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); + if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter, + new_spte)) { + /* + * The iter must explicitly re-read the SPTE because + * the atomic cmpxchg failed. + */ + iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep)); + goto retry; + } spte_set = true; } @@ -1170,7 +1204,9 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) struct kvm_mmu_page *root; bool spte_set = false; - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false) + lockdep_assert_held_read(&kvm->mmu_lock); + + for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); -- 2.11.0