KVM: arm64: Convert unmap_stage2_range() to generic page-table API

author Will Deacon <will@kernel.org>

Fri, 11 Sep 2020 13:25:17 +0000 (14:25 +0100)

committer Marc Zyngier <maz@kernel.org>

Fri, 11 Sep 2020 14:51:13 +0000 (15:51 +0100)
author Will Deacon <will@kernel.org>
Fri, 11 Sep 2020 13:25:17 +0000 (14:25 +0100)
committer Marc Zyngier <maz@kernel.org>
Fri, 11 Sep 2020 14:51:13 +0000 (15:51 +0100)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index d8cfdd2..f03d3d0 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -39,6 +39,38 @@ static bool is_iomap(unsigned long flags)
         return flags & KVM_S2PTE_FLAG_IS_IOMAP;
  }
  
+/*
+ * Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
+ * we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
+ * CONFIG_LOCKUP_DETECTOR, CONFIG_LOCKDEP. Additionally, holding the lock too
+ * long will also starve other vCPUs. We have to also make sure that the page
+ * tables are not freed while we released the lock.
+ */
+static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
+                             phys_addr_t end,
+                             int (*fn)(struct kvm_pgtable *, u64, u64),
+                             bool resched)
+{
+       int ret;
+       u64 next;
+
+       do {
+               struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+               if (!pgt)
+                       return -EINVAL;
+
+               next = stage2_pgd_addr_end(kvm, addr, end);
+               ret = fn(pgt, addr, next - addr);
+               if (ret)
+                       break;
+
+               if (resched && next != end)
+                       cond_resched_lock(&kvm->mmu_lock);
+       } while (addr = next, addr != end);
+
+       return ret;
+}
+
  static bool memslot_is_logging(struct kvm_memory_slot *memslot)
  {
         return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -220,8 +252,8 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp)
   * end up writing old data to disk.
   *
   * This is why right after unmapping a page/section and invalidating
- * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
- * the IO subsystem will never hit in the cache.
+ * the corresponding TLBs, we flush to make sure the IO subsystem will
+ * never hit in the cache.
   *
   * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
   * we then fully enforce cacheability of RAM, no matter what the guest
@@ -344,32 +376,12 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
                                  bool may_block)
  {
         struct kvm *kvm = mmu->kvm;
-       pgd_t *pgd;
-       phys_addr_t addr = start, end = start + size;
-       phys_addr_t next;
+       phys_addr_t end = start + size;
  
         assert_spin_locked(&kvm->mmu_lock);
         WARN_ON(size & ~PAGE_MASK);
-
-       pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
-       do {
-               /*
-                * Make sure the page table is still active, as another thread
-                * could have possibly freed the page table, while we released
-                * the lock.
-                */
-               if (!READ_ONCE(mmu->pgd))
-                       break;
-               next = stage2_pgd_addr_end(kvm, addr, end);
-               if (!stage2_pgd_none(kvm, *pgd))
-                       unmap_stage2_p4ds(mmu, pgd, addr, next);
-               /*
-                * If the range is too large, release the kvm->mmu_lock
-                * to prevent starvation and lockup detector warnings.
-                */
-               if (may_block && next != end)
-                       cond_resched_lock(&kvm->mmu_lock);
-       } while (pgd++, addr = next, addr != end);
+       WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
+                                  may_block));
  }
  
  static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
author	Will Deacon <will@kernel.org>
	Fri, 11 Sep 2020 13:25:17 +0000 (14:25 +0100)
committer	Marc Zyngier <maz@kernel.org>
	Fri, 11 Sep 2020 14:51:13 +0000 (15:51 +0100)