OSDN Git Service

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Oct 2021 18:19:02 +0000 (11:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Oct 2021 18:19:02 +0000 (11:19 -0700)
Pull kvm fixes from Paolo Bonzini:

 - Fixes for s390 interrupt delivery

 - Fixes for Xen emulator bugs showing up as debug kernel WARNs

 - Fix another issue with SEV/ES string I/O VMGEXITs

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: Take srcu lock in post_kvm_run_save()
  KVM: SEV-ES: fix another issue with string I/O VMGEXITs
  KVM: x86/xen: Fix kvm_xen_has_interrupt() sleeping in kvm_vcpu_block()
  KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock
  KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu
  KVM: s390: clear kicked_mask before sleeping again

arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm/sev.c
arch/x86/kvm/x86.c
arch/x86/kvm/xen.c

index 1072245..2245f4b 100644 (file)
@@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
        int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
        struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
        struct kvm_vcpu *vcpu;
+       u8 vcpu_isc_mask;
 
        for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
                vcpu = kvm_get_vcpu(kvm, vcpu_idx);
                if (psw_ioint_disabled(vcpu))
                        continue;
-               deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
-               if (deliverable_mask) {
+               vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+               if (deliverable_mask & vcpu_isc_mask) {
                        /* lately kicked but not yet running */
                        if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
                                return;
index 6a6dd5e..1c97493 100644 (file)
@@ -3363,6 +3363,7 @@ out_free_sie_block:
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
+       clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
        return kvm_s390_vcpu_has_irq(vcpu, 0);
 }
 
index 5a0298a..13f6465 100644 (file)
@@ -1098,7 +1098,7 @@ struct kvm_arch {
        u64 cur_tsc_generation;
        int nr_vcpus_matched_tsc;
 
-       spinlock_t pvclock_gtod_sync_lock;
+       raw_spinlock_t pvclock_gtod_sync_lock;
        bool use_master_clock;
        u64 master_kernel_ns;
        u64 master_cycle_now;
index 2e4916b..7e34d71 100644 (file)
@@ -2591,11 +2591,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 
 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
 {
-       if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+       int count;
+       int bytes;
+
+       if (svm->vmcb->control.exit_info_2 > INT_MAX)
+               return -EINVAL;
+
+       count = svm->vmcb->control.exit_info_2;
+       if (unlikely(check_mul_overflow(count, size, &bytes)))
+               return -EINVAL;
+
+       if (!setup_vmgexit_scratch(svm, in, bytes))
                return -EINVAL;
 
-       return kvm_sev_es_string_io(&svm->vcpu, size, port,
-                                   svm->ghcb_sa, svm->ghcb_sa_len / size, in);
+       return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
 }
 
 void sev_es_init_vmcb(struct vcpu_svm *svm)
index b26647a..bfe0de3 100644 (file)
@@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
        kvm_vcpu_write_tsc_offset(vcpu, offset);
        raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
 
-       spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
        if (!matched) {
                kvm->arch.nr_vcpus_matched_tsc = 0;
        } else if (!already_matched) {
@@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
        }
 
        kvm_track_tsc_matching(vcpu);
-       spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
 }
 
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
        kvm_make_mclock_inprogress_request(kvm);
 
        /* no guest entries from this point */
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        pvclock_update_vm_gtod_copy(kvm);
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
        unsigned long flags;
        u64 ret;
 
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        if (!ka->use_master_clock) {
-               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
                return get_kvmclock_base_ns() + ka->kvmclock_offset;
        }
 
        hv_clock.tsc_timestamp = ka->master_cycle_now;
        hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        /* both __this_cpu_read() and rdtsc() should be on the same cpu */
        get_cpu();
@@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
         * If the host uses TSC clock, then passthrough TSC as stable
         * to the guest.
         */
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        use_master_clock = ka->use_master_clock;
        if (use_master_clock) {
                host_tsc = ka->master_cycle_now;
                kernel_ns = ka->master_kernel_ns;
        }
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
@@ -6100,13 +6100,13 @@ set_pit2_out:
                 * is slightly ahead) here we risk going negative on unsigned
                 * 'system_time' when 'user_ns.clock' is very small.
                 */
-               spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+               raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
                if (kvm->arch.use_master_clock)
                        now_ns = ka->master_kernel_ns;
                else
                        now_ns = get_kvmclock_base_ns();
                ka->kvmclock_offset = user_ns.clock - now_ns;
-               spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+               raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
 
                kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                break;
@@ -8156,9 +8156,9 @@ static void kvm_hyperv_tsc_notifier(void)
        list_for_each_entry(kvm, &vm_list, vm_list) {
                struct kvm_arch *ka = &kvm->arch;
 
-               spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
                pvclock_update_vm_gtod_copy(kvm);
-               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
                kvm_for_each_vcpu(cpu, vcpu, kvm)
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -8800,9 +8800,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
+
+       /*
+        * The call to kvm_ready_for_interrupt_injection() may end up in
+        * kvm_xen_has_interrupt() which may require the srcu lock to be
+        * held, to protect against changes in the vcpu_info address.
+        */
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        kvm_run->ready_for_interrupt_injection =
                pic_in_kernel(vcpu->kvm) ||
                kvm_vcpu_ready_for_interrupt_injection(vcpu);
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
        if (is_smm(vcpu))
                kvm_run->flags |= KVM_RUN_X86_SMM;
@@ -11199,7 +11207,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        raw_spin_lock_init(&kvm->arch.tsc_write_lock);
        mutex_init(&kvm->arch.apic_map_lock);
-       spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+       raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
 
        kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
        pvclock_update_vm_gtod_copy(kvm);
index 9ea9c3d..8f62bae 100644 (file)
@@ -190,6 +190,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
 
 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 {
+       int err;
        u8 rc = 0;
 
        /*
@@ -216,13 +217,29 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
        if (likely(slots->generation == ghc->generation &&
                   !kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
                /* Fast path */
-               __get_user(rc, (u8 __user *)ghc->hva + offset);
-       } else {
-               /* Slow path */
-               kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
-                                            sizeof(rc));
+               pagefault_disable();
+               err = __get_user(rc, (u8 __user *)ghc->hva + offset);
+               pagefault_enable();
+               if (!err)
+                       return rc;
        }
 
+       /* Slow path */
+
+       /*
+        * This function gets called from kvm_vcpu_block() after setting the
+        * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
+        * from a HLT. So we really mustn't sleep. If the page ended up absent
+        * at that point, just return 1 in order to trigger an immediate wake,
+        * and we'll end up getting called again from a context where we *can*
+        * fault in the page and wait for it.
+        */
+       if (in_atomic() || !task_is_running(current))
+               return 1;
+
+       kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
+                                    sizeof(rc));
+
        return rc;
 }