OSDN Git Service

KVM: x86: fix singlestepping over syscall
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / arch / x86 / kvm / x86.c
index 7eb4ebd..3ffd590 100644 (file)
@@ -199,7 +199,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
        struct kvm_shared_msrs *locals
                = container_of(urn, struct kvm_shared_msrs, urn);
        struct kvm_shared_msr_values *values;
+       unsigned long flags;
 
+       /*
+        * Disabling irqs at this point since the following code could be
+        * interrupted and executed through kvm_arch_hardware_disable()
+        */
+       local_irq_save(flags);
+       if (locals->registered) {
+               locals->registered = false;
+               user_return_notifier_unregister(urn);
+       }
+       local_irq_restore(flags);
        for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
                values = &locals->values[slot];
                if (values->host != values->curr) {
@@ -207,8 +218,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
                        values->curr = values->host;
                }
        }
-       locals->registered = false;
-       user_return_notifier_unregister(urn);
 }
 
 static void shared_msr_update(unsigned slot, u32 msr)
@@ -697,7 +706,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
                if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
                        return 1;
        }
-       kvm_put_guest_xcr0(vcpu);
        vcpu->arch.xcr0 = xcr0;
 
        if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -2736,7 +2744,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        }
 
        kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
-       vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2942,6 +2949,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
        memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
+static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
+
 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                                              struct kvm_vcpu_events *events)
 {
@@ -2951,6 +2960,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                              | KVM_VCPUEVENT_VALID_SMM))
                return -EINVAL;
 
+       /* INITs are latched while in SMM */
+       if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
+           (events->smi.smm || events->smi.pending) &&
+           vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
+               return -EINVAL;
+
        process_nmi(vcpu);
        vcpu->arch.exception.pending = events->exception.injected;
        vcpu->arch.exception.nr = events->exception.nr;
@@ -2974,10 +2989,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
        if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+               u32 hflags = vcpu->arch.hflags;
                if (events->smi.smm)
-                       vcpu->arch.hflags |= HF_SMM_MASK;
+                       hflags |= HF_SMM_MASK;
                else
-                       vcpu->arch.hflags &= ~HF_SMM_MASK;
+                       hflags &= ~HF_SMM_MASK;
+               kvm_set_hflags(vcpu, hflags);
+
                vcpu->arch.smi_pending = events->smi.pending;
                if (events->smi.smm_inside_nmi)
                        vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
@@ -3015,6 +3033,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
        if (dbgregs->flags)
                return -EINVAL;
 
+       if (dbgregs->dr6 & ~0xffffffffull)
+               return -EINVAL;
+       if (dbgregs->dr7 & ~0xffffffffull)
+               return -EINVAL;
+
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
        kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = dbgregs->dr6;
@@ -3040,6 +3063,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
        memcpy(dest, xsave, XSAVE_HDR_OFFSET);
 
        /* Set XSTATE_BV */
+       xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
        *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
 
        /*
@@ -3116,11 +3140,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
        }
 }
 
+#define XSAVE_MXCSR_OFFSET 24
+
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                                        struct kvm_xsave *guest_xsave)
 {
        u64 xstate_bv =
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+       u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
        if (cpu_has_xsave) {
                /*
@@ -3128,11 +3155,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
                 * with old userspace.
                 */
-               if (xstate_bv & ~kvm_supported_xcr0())
+               if (xstate_bv & ~kvm_supported_xcr0() ||
+                       mxcsr & ~mxcsr_feature_mask)
                        return -EINVAL;
                load_xsave(vcpu, (u8 *)guest_xsave->region);
        } else {
-               if (xstate_bv & ~XFEATURE_MASK_FPSSE)
+               if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
+                       mxcsr & ~mxcsr_feature_mask)
                        return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state.fxsave,
                        guest_xsave->region, sizeof(struct fxregs_state));
@@ -3314,6 +3343,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        };
        case KVM_SET_VAPIC_ADDR: {
                struct kvm_vapic_addr va;
+               int idx;
 
                r = -EINVAL;
                if (!lapic_in_kernel(vcpu))
@@ -3321,7 +3351,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = -EFAULT;
                if (copy_from_user(&va, argp, sizeof va))
                        goto out;
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        }
        case KVM_X86_SETUP_MCE: {
@@ -4576,16 +4608,20 @@ emul_write:
 
 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
-       /* TODO: String I/O for in kernel device */
-       int r;
+       int r = 0, i;
 
-       if (vcpu->arch.pio.in)
-               r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
-                                   vcpu->arch.pio.size, pd);
-       else
-               r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
-                                    vcpu->arch.pio.port, vcpu->arch.pio.size,
-                                    pd);
+       for (i = 0; i < vcpu->arch.pio.count; i++) {
+               if (vcpu->arch.pio.in)
+                       r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
+                                           vcpu->arch.pio.size, pd);
+               else
+                       r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
+                                            vcpu->arch.pio.port, vcpu->arch.pio.size,
+                                            pd);
+               if (r)
+                       break;
+               pd += vcpu->arch.pio.size;
+       }
        return r;
 }
 
@@ -4623,6 +4659,8 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
        if (vcpu->arch.pio.count)
                goto data_avail;
 
+       memset(vcpu->arch.pio_data, 0, size * count);
+
        ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
        if (ret) {
 data_avail:
@@ -4806,6 +4844,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
 
        if (var.unusable) {
                memset(desc, 0, sizeof(*desc));
+               if (base3)
+                       *base3 = 0;
                return false;
        }
 
@@ -4961,6 +5001,16 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
        kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
 }
 
+static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
+{
+       return emul_to_vcpu(ctxt)->arch.hflags;
+}
+
+static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
+{
+       kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+}
+
 static const struct x86_emulate_ops emulate_ops = {
        .read_gpr            = emulator_read_gpr,
        .write_gpr           = emulator_write_gpr,
@@ -5000,6 +5050,8 @@ static const struct x86_emulate_ops emulate_ops = {
        .intercept           = emulator_intercept,
        .get_cpuid           = emulator_get_cpuid,
        .set_nmi_mask        = emulator_set_nmi_mask,
+       .get_hflags          = emulator_get_hflags,
+       .set_hflags          = emulator_set_hflags,
 };
 
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -5043,6 +5095,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
        ctxt->eflags = kvm_get_rflags(vcpu);
+       ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
        ctxt->eip = kvm_rip_read(vcpu);
        ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
                     (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
@@ -5052,7 +5106,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
        BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
        BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
-       ctxt->emul_flags = vcpu->arch.hflags;
 
        init_decode_cache(ctxt);
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@ -5264,37 +5317,26 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
        return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
-       /*
-        * rflags is the old, "raw" value of the flags.  The new value has
-        * not been saved yet.
-        *
-        * This is correct even for TF set by the guest, because "the
-        * processor will not generate this exception after the instruction
-        * that sets the TF flag".
-        */
-       if (unlikely(rflags & X86_EFLAGS_TF)) {
-               if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-                       kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-                                                 DR6_RTM;
-                       kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-                       kvm_run->debug.arch.exception = DB_VECTOR;
-                       kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
-               } else {
-                       vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
-                       /*
-                        * "Certain debug exceptions may clear bit 0-3.  The
-                        * remaining contents of the DR6 register are never
-                        * cleared by the processor".
-                        */
-                       vcpu->arch.dr6 &= ~15;
-                       vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-                       kvm_queue_exception(vcpu, DB_VECTOR);
-               }
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+               kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+               kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+               kvm_run->debug.arch.exception = DB_VECTOR;
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               *r = EMULATE_USER_EXIT;
+       } else {
+               vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
+               /*
+                * "Certain debug exceptions may clear bit 0-3.  The
+                * remaining contents of the DR6 register are never
+                * cleared by the processor".
+                */
+               vcpu->arch.dr6 &= ~15;
+               vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+               kvm_queue_exception(vcpu, DB_VECTOR);
        }
 }
 
@@ -5448,11 +5490,10 @@ restart:
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
-               if (vcpu->arch.hflags != ctxt->emul_flags)
-                       kvm_set_hflags(vcpu, ctxt->emul_flags);
                kvm_rip_write(vcpu, ctxt->eip);
-               if (r == EMULATE_DONE)
-                       kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+               if (r == EMULATE_DONE &&
+                   (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+                       kvm_vcpu_do_singlestep(vcpu, &r);
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
                        __kvm_set_rflags(vcpu, ctxt->eflags);
@@ -5822,6 +5863,7 @@ out:
 
 void kvm_arch_exit(void)
 {
+       kvm_lapic_exit();
        perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
 
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
@@ -5935,7 +5977,8 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
 
        kvm_x86_ops->patch_hypercall(vcpu, instruction);
 
-       return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
+       return emulator_write_emulated(ctxt, rip, instruction, 3,
+               &ctxt->exception);
 }
 
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
@@ -6495,8 +6538,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        kvm_x86_ops->prepare_guest_switch(vcpu);
        if (vcpu->fpu_active)
                kvm_load_guest_fpu(vcpu);
-       kvm_load_guest_xcr0(vcpu);
-
        vcpu->mode = IN_GUEST_MODE;
 
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6519,6 +6560,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                goto cancel_injection;
        }
 
+       kvm_load_guest_xcr0(vcpu);
+
        if (req_immediate_exit)
                smp_send_reschedule(vcpu->cpu);
 
@@ -6568,6 +6611,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();
 
+       kvm_put_guest_xcr0(vcpu);
+
        /* Interrupt is enabled by handle_external_intr() */
        kvm_x86_ops->handle_external_intr(vcpu);
 
@@ -6969,6 +7014,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
            mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
                return -EINVAL;
 
+       /* INITs are latched while in SMM */
+       if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+           (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
+            mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
+               return -EINVAL;
+
        if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
                vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
                set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
@@ -7215,7 +7266,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
         * and assume host would use all available bits.
         * Guest xcr0 would be loaded later.
         */
-       kvm_put_guest_xcr0(vcpu);
        vcpu->guest_fpu_loaded = 1;
        __kernel_fpu_begin();
        __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
@@ -7224,8 +7274,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-       kvm_put_guest_xcr0(vcpu);
-
        if (!vcpu->guest_fpu_loaded) {
                vcpu->fpu_counter = 0;
                return;
@@ -7250,10 +7298,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+       void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
+
        kvmclock_reset(vcpu);
 
-       free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
        kvm_x86_ops->vcpu_free(vcpu);
+       free_cpumask_var(wbinvd_dirty_mask);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -8199,8 +8249,7 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
        if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
                return true;
        else
-               return !kvm_event_needs_reinjection(vcpu) &&
-                       kvm_x86_ops->interrupt_allowed(vcpu);
+               return kvm_can_do_async_pf(vcpu);
 }
 
 void kvm_arch_start_assignment(struct kvm *kvm)