KVM: MMU: change kvm_mmu_hugepage_adjust() arguments to kvm_page_fault

author Paolo Bonzini <pbonzini@redhat.com>

Sat, 7 Aug 2021 13:21:53 +0000 (09:21 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Fri, 1 Oct 2021 07:44:51 +0000 (03:44 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Sat, 7 Aug 2021 13:21:53 +0000 (09:21 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Fri, 1 Oct 2021 07:44:51 +0000 (03:44 -0400)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h

index 6697571..01a4d1b 100644 (file)
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -127,12 +127,34 @@ struct kvm_page_fault {
         const bool rsvd;
         const bool user;
  
-       /* Derived from mmu.  */
+       /* Derived from mmu and global state.  */
         const bool is_tdp;
+       const bool nx_huge_page_workaround_enabled;
  
-       /* Input to FNAME(fetch), __direct_map and kvm_tdp_mmu_map.  */
+       /*
+        * Whether a >4KB mapping can be created or is forbidden due to NX
+        * hugepages.
+        */
+       bool huge_page_disallowed;
+
+       /*
+        * Maximum page size that can be created for this fault; input to
+        * FNAME(fetch), __direct_map and kvm_tdp_mmu_map.
+        */
         u8 max_level;
  
+       /*
+        * Page size that can be created based on the max_level and the
+        * page size used by the host mapping.
+        */
+       u8 req_level;
+
+       /*
+        * Page size that will be created based on the req_level and
+        * huge_page_disallowed.
+        */
+       u8 goal_level;
+
         /* Shifted addr, or result of guest page table walk if addr is a gva.  */
         gfn_t gfn;
  
@@ -144,6 +166,12 @@ struct kvm_page_fault {
  
  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
  
+extern int nx_huge_pages;
+static inline bool is_nx_huge_page_enabled(void)
+{
+       return READ_ONCE(nx_huge_pages);
+}
+
  static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
                                         u32 err, bool prefault)
  {
@@ -157,8 +185,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
                 .user = err & PFERR_USER_MASK,
                 .prefault = prefault,
                 .is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
+               .nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(),
  
                 .max_level = KVM_MAX_HUGEPAGE_LEVEL,
+               .req_level = PG_LEVEL_4K,
+               .goal_level = PG_LEVEL_4K,
         };
  #ifdef CONFIG_RETPOLINE
         if (fault.is_tdp)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 36cbe5c..877d0bd 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2920,48 +2920,45 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
         return min(host_level, max_level);
  }
  
-int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
-                           int max_level, kvm_pfn_t *pfnp,
-                           bool huge_page_disallowed, int *req_level)
+void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
  {
         struct kvm_memory_slot *slot;
-       kvm_pfn_t pfn = *pfnp;
         kvm_pfn_t mask;
-       int level;
  
-       *req_level = PG_LEVEL_4K;
+       fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled;
  
-       if (unlikely(max_level == PG_LEVEL_4K))
-               return PG_LEVEL_4K;
+       if (unlikely(fault->max_level == PG_LEVEL_4K))
+               return;
  
-       if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn))
-               return PG_LEVEL_4K;
+       if (is_error_noslot_pfn(fault->pfn) || kvm_is_reserved_pfn(fault->pfn))
+               return;
  
-       slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true);
+       slot = gfn_to_memslot_dirty_bitmap(vcpu, fault->gfn, true);
         if (!slot)
-               return PG_LEVEL_4K;
+               return;
  
         /*
          * Enforce the iTLB multihit workaround after capturing the requested
          * level, which will be used to do precise, accurate accounting.
          */
-       *req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
-       if (level == PG_LEVEL_4K || huge_page_disallowed)
-               return PG_LEVEL_4K;
+       fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
+                                                    fault->gfn, fault->pfn,
+                                                    fault->max_level);
+       if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
+               return;
  
         /*
          * mmu_notifier_retry() was successful and mmu_lock is held, so
          * the pmd can't be split from under us.
          */
-       mask = KVM_PAGES_PER_HPAGE(level) - 1;
-       VM_BUG_ON((gfn & mask) != (pfn & mask));
-       *pfnp = pfn & ~mask;
-
-       return level;
+       fault->goal_level = fault->req_level;
+       mask = KVM_PAGES_PER_HPAGE(fault->goal_level) - 1;
+       VM_BUG_ON((fault->gfn & mask) != (fault->pfn & mask));
+       fault->pfn &= ~mask;
  }
  
  void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
-                               kvm_pfn_t *pfnp, int *goal_levelp)
+                               kvm_pfn_t *pfnp, u8 *goal_levelp)
  {
         int level = *goal_levelp;
  
@@ -2984,28 +2981,25 @@ void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
  
  static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
  {
-       bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
-       bool huge_page_disallowed = fault->exec && nx_huge_page_workaround_enabled;
         struct kvm_shadow_walk_iterator it;
         struct kvm_mmu_page *sp;
-       int level, req_level, ret;
+       int ret;
         gfn_t base_gfn = fault->gfn;
  
-       level = kvm_mmu_hugepage_adjust(vcpu, fault->gfn, fault->max_level, &fault->pfn,
-                                       huge_page_disallowed, &req_level);
+       kvm_mmu_hugepage_adjust(vcpu, fault);
  
-       trace_kvm_mmu_spte_requested(fault->addr, level, fault->pfn);
+       trace_kvm_mmu_spte_requested(fault->addr, fault->goal_level, fault->pfn);
         for_each_shadow_entry(vcpu, fault->addr, it) {
                 /*
                  * We cannot overwrite existing page tables with an NX
                  * large page, as the leaf could be executable.
                  */
-               if (nx_huge_page_workaround_enabled)
+               if (fault->nx_huge_page_workaround_enabled)
                         disallowed_hugepage_adjust(*it.sptep, fault->gfn, it.level,
-                                                  &fault->pfn, &level);
+                                                  &fault->pfn, &fault->goal_level);
  
                 base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
-               if (it.level == level)
+               if (it.level == fault->goal_level)
                         break;
  
                 drop_large_spte(vcpu, it.sptep);
@@ -3016,13 +3010,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
                                       it.level - 1, true, ACC_ALL);
  
                 link_shadow_page(vcpu, it.sptep, sp);
-               if (fault->is_tdp && huge_page_disallowed &&
-                   req_level >= it.level)
+               if (fault->is_tdp && fault->huge_page_disallowed &&
+                   fault->req_level >= it.level)
                         account_huge_nx_page(vcpu->kvm, sp);
         }
  
         ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
-                          fault->write, level, base_gfn, fault->pfn,
+                          fault->write, fault->goal_level, base_gfn, fault->pfn,
                            fault->prefault, fault->map_writable);
         if (ret == RET_PF_SPURIOUS)
                 return ret;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h

index 2ba12ef..ae0c7bc 100644 (file)
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -118,12 +118,6 @@ static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
                kvm_x86_ops.cpu_dirty_log_size;
  }
  
-extern int nx_huge_pages;
-static inline bool is_nx_huge_page_enabled(void)
-{
-       return READ_ONCE(nx_huge_pages);
-}
-
  int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync,
                             bool speculative);
  
@@ -164,11 +158,9 @@ enum {
  int kvm_mmu_max_mapping_level(struct kvm *kvm,
                               const struct kvm_memory_slot *slot, gfn_t gfn,
                               kvm_pfn_t pfn, int max_level);
-int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
-                           int max_level, kvm_pfn_t *pfnp,
-                           bool huge_page_disallowed, int *req_level);
+void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
  void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
-                               kvm_pfn_t *pfnp, int *goal_levelp);
+                               kvm_pfn_t *pfnp, u8 *goal_levelp);
  
  void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
  
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h

index afd2ad8..20f6169 100644 (file)
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -658,12 +658,10 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
  static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
                          struct guest_walker *gw)
  {
-       bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
-       bool huge_page_disallowed = fault->exec && nx_huge_page_workaround_enabled;
         struct kvm_mmu_page *sp = NULL;
         struct kvm_shadow_walk_iterator it;
         unsigned int direct_access, access;
-       int top_level, level, req_level, ret;
+       int top_level, ret;
         gfn_t base_gfn = fault->gfn;
  
         WARN_ON_ONCE(gw->gfn != base_gfn);
@@ -730,8 +728,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
                         link_shadow_page(vcpu, it.sptep, sp);
         }
  
-       level = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, fault->max_level, &fault->pfn,
-                                       huge_page_disallowed, &req_level);
+       kvm_mmu_hugepage_adjust(vcpu, fault);
  
         trace_kvm_mmu_spte_requested(fault->addr, gw->level, fault->pfn);
  
@@ -742,12 +739,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
                  * We cannot overwrite existing page tables with an NX
                  * large page, as the leaf could be executable.
                  */
-               if (nx_huge_page_workaround_enabled)
+               if (fault->nx_huge_page_workaround_enabled)
                         disallowed_hugepage_adjust(*it.sptep, fault->gfn, it.level,
-                                                  &fault->pfn, &level);
+                                                  &fault->pfn, &fault->goal_level);
  
                 base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
-               if (it.level == level)
+               if (it.level == fault->goal_level)
                         break;
  
                 validate_direct_spte(vcpu, it.sptep, direct_access);
@@ -758,7 +755,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
                         sp = kvm_mmu_get_page(vcpu, base_gfn, fault->addr,
                                               it.level - 1, true, direct_access);
                         link_shadow_page(vcpu, it.sptep, sp);
-                       if (huge_page_disallowed && req_level >= it.level)
+                       if (fault->huge_page_disallowed &&
+                           fault->req_level >= it.level)
                                 account_huge_nx_page(vcpu->kvm, sp);
                 }
         }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index 6cfba8c..b48256b 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -986,30 +986,25 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
   */
  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
  {
-       bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
-       bool huge_page_disallowed = fault->exec && nx_huge_page_workaround_enabled;
         struct kvm_mmu *mmu = vcpu->arch.mmu;
         struct tdp_iter iter;
         struct kvm_mmu_page *sp;
         u64 *child_pt;
         u64 new_spte;
         int ret;
-       int level;
-       int req_level;
  
-       level = kvm_mmu_hugepage_adjust(vcpu, fault->gfn, fault->max_level, &fault->pfn,
-                                       huge_page_disallowed, &req_level);
+       kvm_mmu_hugepage_adjust(vcpu, fault);
  
-       trace_kvm_mmu_spte_requested(fault->addr, level, fault->pfn);
+       trace_kvm_mmu_spte_requested(fault->addr, fault->goal_level, fault->pfn);
  
         rcu_read_lock();
  
         tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
-               if (nx_huge_page_workaround_enabled)
+               if (fault->nx_huge_page_workaround_enabled)
                         disallowed_hugepage_adjust(iter.old_spte, fault->gfn,
-                                                  iter.level, &fault->pfn, &level);
+                                                  iter.level, &fault->pfn, &fault->goal_level);
  
-               if (iter.level == level)
+               if (iter.level == fault->goal_level)
                         break;
  
                 /*
@@ -1047,8 +1042,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
  
                         if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) {
                                 tdp_mmu_link_page(vcpu->kvm, sp,
-                                                 huge_page_disallowed &&
-                                                 req_level >= iter.level);
+                                                 fault->huge_page_disallowed &&
+                                                 fault->req_level >= iter.level);
  
                                 trace_kvm_mmu_get_page(sp, true);
                         } else {
@@ -1058,7 +1053,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
                 }
         }
  
-       if (iter.level != level) {
+       if (iter.level != fault->goal_level) {
                 rcu_read_unlock();
                 return RET_PF_RETRY;
         }
author	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 7 Aug 2021 13:21:53 +0000 (09:21 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Fri, 1 Oct 2021 07:44:51 +0000 (03:44 -0400)
arch/x86/kvm/mmu.h		patch \| blob \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu_internal.h		patch \| blob \| history
arch/x86/kvm/mmu/paging_tmpl.h		patch \| blob \| history
arch/x86/kvm/mmu/tdp_mmu.c		patch \| blob \| history