OSDN Git Service

KVM: Add Directed EOI support to APIC emulation
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / arch / x86 / kvm / x86.c
index fe5474a..086f931 100644 (file)
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#include <trace/events/kvm.h>
+#undef TRACE_INCLUDE_FILE
+#define CREATE_TRACE_POINTS
+#include "trace.h"
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
+#include <asm/mce.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS                                              \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+
+#define KVM_MAX_MCE_BANKS 32
+#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+
 /* EFER defaults:
  * - enable syscall per default because its emulated by KVM
  * - enable LME and LMA per default on 64 bit KVM
@@ -70,12 +79,13 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
 
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                                    struct kvm_cpuid_entry2 __user *entries);
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
-                                             u32 function, u32 index);
 
 struct kvm_x86_ops *kvm_x86_ops;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
 
+int ignore_msrs = 0;
+module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "pf_fixed", VCPU_STAT(pf_fixed) },
        { "pf_guest", VCPU_STAT(pf_guest) },
@@ -176,16 +186,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
        ++vcpu->stat.pf_guest;
 
        if (vcpu->arch.exception.pending) {
-               if (vcpu->arch.exception.nr == PF_VECTOR) {
-                       printk(KERN_DEBUG "kvm: inject_page_fault:"
-                                       " double fault 0x%lx\n", addr);
-                       vcpu->arch.exception.nr = DF_VECTOR;
-                       vcpu->arch.exception.error_code = 0;
-               } else if (vcpu->arch.exception.nr == DF_VECTOR) {
+               switch(vcpu->arch.exception.nr) {
+               case DF_VECTOR:
                        /* triple fault -> shutdown */
                        set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+                       return;
+               case PF_VECTOR:
+                       vcpu->arch.exception.nr = DF_VECTOR;
+                       vcpu->arch.exception.error_code = 0;
+                       return;
+               default:
+                       /* replace previous exception with a new one in a hope
+                          that instruction re-execution will regenerate lost
+                          exception */
+                       vcpu->arch.exception.pending = false;
+                       break;
                }
-               return;
        }
        vcpu->arch.cr2 = addr;
        kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
@@ -232,7 +248,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
                goto out;
        }
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-               if (is_present_pte(pdpte[i]) &&
+               if (is_present_gpte(pdpte[i]) &&
                    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
                        ret = 0;
                        goto out;
@@ -241,6 +257,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
        ret = 1;
 
        memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_avail);
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_dirty);
 out:
 
        return ret;
@@ -256,6 +276,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
        if (is_long_mode(vcpu) || !is_pae(vcpu))
                return false;
 
+       if (!test_bit(VCPU_EXREG_PDPTR,
+                     (unsigned long *)&vcpu->arch.regs_avail))
+               return true;
+
        r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
        if (r < 0)
                goto out;
@@ -328,9 +352,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
        kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
-       KVMTRACE_1D(LMSW, vcpu,
-                   (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
-                   handler);
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
@@ -466,7 +487,7 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-       MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+       MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
        MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
 };
 
@@ -644,8 +665,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
-       kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
-                         &vcpu->hv_clock.tsc_timestamp);
+       kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
        ktime_get_ts(&ts);
        local_irq_restore(flags);
 
@@ -704,11 +724,48 @@ static bool msr_mtrr_valid(unsigned msr)
        return false;
 }
 
+static bool valid_pat_type(unsigned t)
+{
+       return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
+}
+
+static bool valid_mtrr_type(unsigned t)
+{
+       return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
+}
+
+static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+       int i;
+
+       if (!msr_mtrr_valid(msr))
+               return false;
+
+       if (msr == MSR_IA32_CR_PAT) {
+               for (i = 0; i < 8; i++)
+                       if (!valid_pat_type((data >> (i * 8)) & 0xff))
+                               return false;
+               return true;
+       } else if (msr == MSR_MTRRdefType) {
+               if (data & ~0xcff)
+                       return false;
+               return valid_mtrr_type(data & 0xff);
+       } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
+               for (i = 0; i < 8 ; i++)
+                       if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
+                               return false;
+               return true;
+       }
+
+       /* variable MTRRs */
+       return valid_mtrr_type(data & 0xff);
+}
+
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
 
-       if (!msr_mtrr_valid(msr))
+       if (!mtrr_valid(vcpu, msr, data))
                return 1;
 
        if (msr == MSR_MTRRdefType) {
@@ -741,23 +798,53 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        return 0;
 }
 
+static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
+
+       switch (msr) {
+       case MSR_IA32_MCG_STATUS:
+               vcpu->arch.mcg_status = data;
+               break;
+       case MSR_IA32_MCG_CTL:
+               if (!(mcg_cap & MCG_CTL_P))
+                       return 1;
+               if (data != 0 && data != ~(u64)0)
+                       return -1;
+               vcpu->arch.mcg_ctl = data;
+               break;
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+                       /* only 0 or all 1s can be written to IA32_MCi_CTL */
+                       if ((offset & 0x3) == 0 &&
+                           data != 0 && data != ~(u64)0)
+                               return -1;
+                       vcpu->arch.mce_banks[offset] = data;
+                       break;
+               }
+               return 1;
+       }
+       return 0;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        switch (msr) {
        case MSR_EFER:
                set_efer(vcpu, data);
                break;
-       case MSR_IA32_MC0_STATUS:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
-                      __func__, data);
-               break;
-       case MSR_IA32_MCG_STATUS:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
-                       __func__, data);
+       case MSR_K7_HWCR:
+               data &= ~(u64)0x40;     /* ignore flush filter disable */
+               if (data != 0) {
+                       pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
+                               data);
+                       return 1;
+               }
                break;
-       case MSR_IA32_MCG_CTL:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
-                       __func__, data);
+       case MSR_AMD64_NB_CFG:
                break;
        case MSR_IA32_DEBUGCTLMSR:
                if (!data) {
@@ -813,9 +900,50 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                kvm_request_guest_time_update(vcpu);
                break;
        }
+       case MSR_IA32_MCG_CTL:
+       case MSR_IA32_MCG_STATUS:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+               return set_msr_mce(vcpu, msr, data);
+
+       /* Performance counters are not protected by a CPUID bit,
+        * so we should check all of them in the generic path for the sake of
+        * cross vendor migration.
+        * Writing a zero into the event select MSRs disables them,
+        * which we perfectly emulate ;-). Any other value should be at least
+        * reported, some guests depend on them.
+        */
+       case MSR_P6_EVNTSEL0:
+       case MSR_P6_EVNTSEL1:
+       case MSR_K7_EVNTSEL0:
+       case MSR_K7_EVNTSEL1:
+       case MSR_K7_EVNTSEL2:
+       case MSR_K7_EVNTSEL3:
+               if (data != 0)
+                       pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+                               "0x%x data 0x%llx\n", msr, data);
+               break;
+       /* at least RHEL 4 unconditionally writes to the perfctr registers,
+        * so we ignore writes to make it happy.
+        */
+       case MSR_P6_PERFCTR0:
+       case MSR_P6_PERFCTR1:
+       case MSR_K7_PERFCTR0:
+       case MSR_K7_PERFCTR1:
+       case MSR_K7_PERFCTR2:
+       case MSR_K7_PERFCTR3:
+               pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+                       "0x%x data 0x%llx\n", msr, data);
+               break;
        default:
-               pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
-               return 1;
+               if (!ignore_msrs) {
+                       pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
+                               msr, data);
+                       return 1;
+               } else {
+                       pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
+                               msr, data);
+                       break;
+               }
        }
        return 0;
 }
@@ -868,26 +996,47 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        return 0;
 }
 
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
        u64 data;
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
 
        switch (msr) {
-       case 0xc0010010: /* SYSCFG */
-       case 0xc0010015: /* HWCR */
-       case MSR_IA32_PLATFORM_ID:
        case MSR_IA32_P5_MC_ADDR:
        case MSR_IA32_P5_MC_TYPE:
-       case MSR_IA32_MC0_CTL:
-       case MSR_IA32_MCG_STATUS:
+               data = 0;
+               break;
        case MSR_IA32_MCG_CAP:
+               data = vcpu->arch.mcg_cap;
+               break;
        case MSR_IA32_MCG_CTL:
-       case MSR_IA32_MC0_MISC:
-       case MSR_IA32_MC0_MISC+4:
-       case MSR_IA32_MC0_MISC+8:
-       case MSR_IA32_MC0_MISC+12:
-       case MSR_IA32_MC0_MISC+16:
-       case MSR_IA32_MC0_MISC+20:
+               if (!(mcg_cap & MCG_CTL_P))
+                       return 1;
+               data = vcpu->arch.mcg_ctl;
+               break;
+       case MSR_IA32_MCG_STATUS:
+               data = vcpu->arch.mcg_status;
+               break;
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+                       data = vcpu->arch.mce_banks[offset];
+                       break;
+               }
+               return 1;
+       }
+       *pdata = data;
+       return 0;
+}
+
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+       u64 data;
+
+       switch (msr) {
+       case MSR_IA32_PLATFORM_ID:
        case MSR_IA32_UCODE_REV:
        case MSR_IA32_EBL_CR_POWERON:
        case MSR_IA32_DEBUGCTLMSR:
@@ -895,10 +1044,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_LASTBRANCHTOIP:
        case MSR_IA32_LASTINTFROMIP:
        case MSR_IA32_LASTINTTOIP:
+       case MSR_K8_SYSCFG:
+       case MSR_K7_HWCR:
        case MSR_VM_HSAVE_PA:
        case MSR_P6_EVNTSEL0:
        case MSR_P6_EVNTSEL1:
        case MSR_K7_EVNTSEL0:
+       case MSR_K8_INT_PENDING_MSG:
+       case MSR_AMD64_NB_CFG:
                data = 0;
                break;
        case MSR_MTRRcap:
@@ -930,9 +1083,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_KVM_SYSTEM_TIME:
                data = vcpu->arch.time;
                break;
+       case MSR_IA32_P5_MC_ADDR:
+       case MSR_IA32_P5_MC_TYPE:
+       case MSR_IA32_MCG_CAP:
+       case MSR_IA32_MCG_CTL:
+       case MSR_IA32_MCG_STATUS:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+               return get_msr_mce(vcpu, msr, pdata);
        default:
-               pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
-               return 1;
+               if (!ignore_msrs) {
+                       pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
+                       return 1;
+               } else {
+                       pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
+                       data = 0;
+               }
+               break;
        }
        *pdata = data;
        return 0;
@@ -1031,6 +1197,8 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_REINJECT_CONTROL:
        case KVM_CAP_IRQ_INJECT_STATUS:
        case KVM_CAP_ASSIGN_DEV_IRQ:
+       case KVM_CAP_IRQFD:
+       case KVM_CAP_PIT2:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -1051,6 +1219,9 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_IOMMU:
                r = iommu_found();
                break;
+       case KVM_CAP_MCE:
+               r = KVM_MAX_MCE_BANKS;
+               break;
        default:
                r = 0;
                break;
@@ -1079,14 +1250,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
                if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
                        goto out;
                r = -E2BIG;
-               if (n < num_msrs_to_save)
+               if (n < msr_list.nmsrs)
                        goto out;
                r = -EFAULT;
                if (copy_to_user(user_msr_list->indices, &msrs_to_save,
                                 num_msrs_to_save * sizeof(u32)))
                        goto out;
-               if (copy_to_user(user_msr_list->indices
-                                + num_msrs_to_save * sizeof(u32),
+               if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
                                 &emulated_msrs,
                                 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
                        goto out;
@@ -1111,6 +1281,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_X86_GET_MCE_CAP_SUPPORTED: {
+               u64 mce_cap;
+
+               mce_cap = KVM_MCE_CAP_SUPPORTED;
+               r = -EFAULT;
+               if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+                       goto out;
+               r = 0;
+               break;
+       }
        default:
                r = -EINVAL;
        }
@@ -1191,6 +1371,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
        vcpu->arch.cpuid_nent = cpuid->nent;
        cpuid_fix_nx_cap(vcpu);
        r = 0;
+       kvm_apic_set_version(vcpu);
 
 out_free:
        vfree(cpuid_entries);
@@ -1212,6 +1393,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
                           cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
                goto out;
        vcpu->arch.cpuid_nent = cpuid->nent;
+       kvm_apic_set_version(vcpu);
        return 0;
 
 out:
@@ -1399,6 +1581,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
        for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
                do_cpuid_ent(&cpuid_entries[nent], func, 0,
                             &nent, cpuid->nent);
+       r = -E2BIG;
+       if (nent >= cpuid->nent)
+               goto out_free;
+
        r = -EFAULT;
        if (copy_to_user(entries, cpuid_entries,
                         nent * sizeof(struct kvm_cpuid_entry2)))
@@ -1467,6 +1653,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
+                                       u64 mcg_cap)
+{
+       int r;
+       unsigned bank_num = mcg_cap & 0xff, bank;
+
+       r = -EINVAL;
+       if (!bank_num)
+               goto out;
+       if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+               goto out;
+       r = 0;
+       vcpu->arch.mcg_cap = mcg_cap;
+       /* Init IA32_MCG_CTL to all 1s */
+       if (mcg_cap & MCG_CTL_P)
+               vcpu->arch.mcg_ctl = ~(u64)0;
+       /* Init IA32_MCi_CTL to all 1s */
+       for (bank = 0; bank < bank_num; bank++)
+               vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+out:
+       return r;
+}
+
+static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
+                                     struct kvm_x86_mce *mce)
+{
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
+       u64 *banks = vcpu->arch.mce_banks;
+
+       if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
+               return -EINVAL;
+       /*
+        * if IA32_MCG_CTL is not all 1s, the uncorrected error
+        * reporting is disabled
+        */
+       if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+           vcpu->arch.mcg_ctl != ~(u64)0)
+               return 0;
+       banks += 4 * mce->bank;
+       /*
+        * if IA32_MCi_CTL is not all 1s, the uncorrected error
+        * reporting is disabled for the bank
+        */
+       if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
+               return 0;
+       if (mce->status & MCI_STATUS_UC) {
+               if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
+                   !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+                       printk(KERN_DEBUG "kvm: set_mce: "
+                              "injects mce exception while "
+                              "previous one is in progress!\n");
+                       set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+                       return 0;
+               }
+               if (banks[1] & MCI_STATUS_VAL)
+                       mce->status |= MCI_STATUS_OVER;
+               banks[2] = mce->addr;
+               banks[3] = mce->misc;
+               vcpu->arch.mcg_status = mce->mcg_status;
+               banks[1] = mce->status;
+               kvm_queue_exception(vcpu, MC_VECTOR);
+       } else if (!(banks[1] & MCI_STATUS_VAL)
+                  || !(banks[1] & MCI_STATUS_UC)) {
+               if (banks[1] & MCI_STATUS_VAL)
+                       mce->status |= MCI_STATUS_OVER;
+               banks[2] = mce->addr;
+               banks[3] = mce->misc;
+               banks[1] = mce->status;
+       } else
+               banks[1] |= MCI_STATUS_OVER;
+       return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
 {
@@ -1600,6 +1860,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
                break;
        }
+       case KVM_X86_SETUP_MCE: {
+               u64 mcg_cap;
+
+               r = -EFAULT;
+               if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
+                       goto out;
+               r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
+               break;
+       }
+       case KVM_X86_SET_MCE: {
+               struct kvm_x86_mce mce;
+
+               r = -EFAULT;
+               if (copy_from_user(&mce, argp, sizeof mce))
+                       goto out;
+               r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
+               break;
+       }
        default:
                r = -EINVAL;
        }
@@ -1739,19 +2017,25 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_PIC_MASTER:
+               spin_lock(&pic_irqchip(kvm)->lock);
                memcpy(&pic_irqchip(kvm)->pics[0],
                        &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
+               spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_PIC_SLAVE:
+               spin_lock(&pic_irqchip(kvm)->lock);
                memcpy(&pic_irqchip(kvm)->pics[1],
                        &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
+               spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_IOAPIC:
+               mutex_lock(&kvm->irq_lock);
                memcpy(ioapic_irqchip(kvm),
                        &chip->chip.ioapic,
                        sizeof(struct kvm_ioapic_state));
+               mutex_unlock(&kvm->irq_lock);
                break;
        default:
                r = -EINVAL;
@@ -1765,7 +2049,9 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
        int r = 0;
 
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return r;
 }
 
@@ -1773,8 +2059,10 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
        int r = 0;
 
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
        kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return r;
 }
 
@@ -1783,7 +2071,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 {
        if (!kvm->arch.vpit)
                return -ENXIO;
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
        kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return 0;
 }
 
@@ -1834,6 +2124,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
        union {
                struct kvm_pit_state ps;
                struct kvm_memory_alias alias;
+               struct kvm_pit_config pit_config;
        } u;
 
        switch (ioctl) {
@@ -1894,16 +2185,24 @@ long kvm_arch_vm_ioctl(struct file *filp,
                }
                break;
        case KVM_CREATE_PIT:
-               mutex_lock(&kvm->lock);
+               u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
+               goto create_pit;
+       case KVM_CREATE_PIT2:
+               r = -EFAULT;
+               if (copy_from_user(&u.pit_config, argp,
+                                  sizeof(struct kvm_pit_config)))
+                       goto out;
+       create_pit:
+               down_write(&kvm->slots_lock);
                r = -EEXIST;
                if (kvm->arch.vpit)
                        goto create_pit_unlock;
                r = -ENOMEM;
-               kvm->arch.vpit = kvm_create_pit(kvm);
+               kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
                if (kvm->arch.vpit)
                        r = 0;
        create_pit_unlock:
-               mutex_unlock(&kvm->lock);
+               up_write(&kvm->slots_lock);
                break;
        case KVM_IRQ_LINE_STATUS:
        case KVM_IRQ_LINE: {
@@ -1914,10 +2213,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                if (irqchip_in_kernel(kvm)) {
                        __s32 status;
-                       mutex_lock(&kvm->lock);
+                       mutex_lock(&kvm->irq_lock);
                        status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                                        irq_event.irq, irq_event.level);
-                       mutex_unlock(&kvm->lock);
+                       mutex_unlock(&kvm->irq_lock);
                        if (ioctl == KVM_IRQ_LINE_STATUS) {
                                irq_event.status = status;
                                if (copy_to_user(argp, &irq_event,
@@ -2039,35 +2338,23 @@ static void kvm_init_msr_list(void)
        num_msrs_to_save = j;
 }
 
-/*
- * Only apic need an MMIO device hook, so shortcut now..
- */
-static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
-                                               gpa_t addr, int len,
-                                               int is_write)
+static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
+                          const void *v)
 {
-       struct kvm_io_device *dev;
+       if (vcpu->arch.apic &&
+           !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
+               return 0;
 
-       if (vcpu->arch.apic) {
-               dev = &vcpu->arch.apic->dev;
-               if (dev->in_range(dev, addr, len, is_write))
-                       return dev;
-       }
-       return NULL;
+       return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
 }
 
-
-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-                                               gpa_t addr, int len,
-                                               int is_write)
+static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 {
-       struct kvm_io_device *dev;
+       if (vcpu->arch.apic &&
+           !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
+               return 0;
 
-       dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
-       if (dev == NULL)
-               dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
-                                         is_write);
-       return dev;
+       return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
 }
 
 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
@@ -2136,11 +2423,12 @@ static int emulator_read_emulated(unsigned long addr,
                                  unsigned int bytes,
                                  struct kvm_vcpu *vcpu)
 {
-       struct kvm_io_device *mmio_dev;
        gpa_t                 gpa;
 
        if (vcpu->mmio_read_completed) {
                memcpy(val, vcpu->mmio_data, bytes);
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+                              vcpu->mmio_phys_addr, *(u64 *)val);
                vcpu->mmio_read_completed = 0;
                return X86EMUL_CONTINUE;
        }
@@ -2161,14 +2449,12 @@ mmio:
        /*
         * Is this MMIO handled locally?
         */
-       mutex_lock(&vcpu->kvm->lock);
-       mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
-       if (mmio_dev) {
-               kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-               mutex_unlock(&vcpu->kvm->lock);
+       if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
                return X86EMUL_CONTINUE;
        }
-       mutex_unlock(&vcpu->kvm->lock);
+
+       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
 
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
@@ -2195,7 +2481,6 @@ static int emulator_write_emulated_onepage(unsigned long addr,
                                           unsigned int bytes,
                                           struct kvm_vcpu *vcpu)
 {
-       struct kvm_io_device *mmio_dev;
        gpa_t                 gpa;
 
        gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
@@ -2213,17 +2498,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
                return X86EMUL_CONTINUE;
 
 mmio:
+       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
        /*
         * Is this MMIO handled locally?
         */
-       mutex_lock(&vcpu->kvm->lock);
-       mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
-       if (mmio_dev) {
-               kvm_iodevice_write(mmio_dev, gpa, bytes, val);
-               mutex_unlock(&vcpu->kvm->lock);
+       if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
                return X86EMUL_CONTINUE;
-       }
-       mutex_unlock(&vcpu->kvm->lock);
 
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
@@ -2312,7 +2592,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-       KVMTRACE_0D(CLTS, vcpu, handler);
        kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
        return X86EMUL_CONTINUE;
 }
@@ -2413,14 +2692,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
                r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
 
-               /* Reject the instructions other than VMCALL/VMMCALL when
-                * try to emulate invalid opcode */
+               /* Only allow emulation of specific instructions on #UD
+                * (namely VMMCALL, sysenter, sysexit, syscall)*/
                c = &vcpu->arch.emulate_ctxt.decode;
-               if ((emulation_type & EMULTYPE_TRAP_UD) &&
-                   (!(c->twobyte && c->b == 0x01 &&
-                     (c->modrm_reg == 0 || c->modrm_reg == 3) &&
-                      c->modrm_mod == 3 && c->modrm_rm == 1)))
-                       return EMULATE_FAIL;
+               if (emulation_type & EMULTYPE_TRAP_UD) {
+                       if (!c->twobyte)
+                               return EMULATE_FAIL;
+                       switch (c->b) {
+                       case 0x01: /* VMMCALL */
+                               if (c->modrm_mod != 3 || c->modrm_rm != 1)
+                                       return EMULATE_FAIL;
+                               break;
+                       case 0x34: /* sysenter */
+                       case 0x35: /* sysexit */
+                               if (c->modrm_mod != 0 || c->modrm_rm != 0)
+                                       return EMULATE_FAIL;
+                               break;
+                       case 0x05: /* syscall */
+                               if (c->modrm_mod != 0 || c->modrm_rm != 0)
+                                       return EMULATE_FAIL;
+                               break;
+                       default:
+                               return EMULATE_FAIL;
+                       }
+
+                       if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
+                               return EMULATE_FAIL;
+               }
 
                ++vcpu->stat.insn_emulation;
                if (r)  {
@@ -2540,52 +2838,40 @@ int complete_pio(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static void kernel_pio(struct kvm_io_device *pio_dev,
-                      struct kvm_vcpu *vcpu,
-                      void *pd)
+static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
        /* TODO: String I/O for in kernel device */
+       int r;
 
-       mutex_lock(&vcpu->kvm->lock);
        if (vcpu->arch.pio.in)
-               kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
-                                 vcpu->arch.pio.size,
-                                 pd);
+               r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+                                   vcpu->arch.pio.size, pd);
        else
-               kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
-                                  vcpu->arch.pio.size,
-                                  pd);
-       mutex_unlock(&vcpu->kvm->lock);
+               r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+                                    vcpu->arch.pio.size, pd);
+       return r;
 }
 
-static void pio_string_write(struct kvm_io_device *pio_dev,
-                            struct kvm_vcpu *vcpu)
+static int pio_string_write(struct kvm_vcpu *vcpu)
 {
        struct kvm_pio_request *io = &vcpu->arch.pio;
        void *pd = vcpu->arch.pio_data;
-       int i;
+       int i, r = 0;
 
-       mutex_lock(&vcpu->kvm->lock);
        for (i = 0; i < io->cur_count; i++) {
-               kvm_iodevice_write(pio_dev, io->port,
-                                  io->size,
-                                  pd);
+               if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+                                    io->port, io->size, pd)) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
                pd += io->size;
        }
-       mutex_unlock(&vcpu->kvm->lock);
-}
-
-static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
-                                              gpa_t addr, int len,
-                                              int is_write)
-{
-       return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
+       return r;
 }
 
 int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
                  int size, unsigned port)
 {
-       struct kvm_io_device *pio_dev;
        unsigned long val;
 
        vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2599,19 +2885,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
        vcpu->arch.pio.down = 0;
        vcpu->arch.pio.rep = 0;
 
-       if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-               KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
-       else
-               KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
+       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+                     size, 1);
 
        val = kvm_register_read(vcpu, VCPU_REGS_RAX);
        memcpy(vcpu->arch.pio_data, &val, 4);
 
-       pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
-       if (pio_dev) {
-               kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
+       if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
                complete_pio(vcpu);
                return 1;
        }
@@ -2625,7 +2905,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 {
        unsigned now, in_page;
        int ret = 0;
-       struct kvm_io_device *pio_dev;
 
        vcpu->run->exit_reason = KVM_EXIT_IO;
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -2638,12 +2917,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
        vcpu->arch.pio.down = down;
        vcpu->arch.pio.rep = rep;
 
-       if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-               KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
-       else
-               KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
+       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+                     size, count);
 
        if (!count) {
                kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -2673,9 +2948,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
        vcpu->arch.pio.guest_gva = address;
 
-       pio_dev = vcpu_find_pio_dev(vcpu, port,
-                                   vcpu->arch.pio.cur_count,
-                                   !vcpu->arch.pio.in);
        if (!vcpu->arch.pio.in) {
                /* string PIO write */
                ret = pio_copy_data(vcpu);
@@ -2683,16 +2955,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
                        kvm_inject_gp(vcpu, 0);
                        return 1;
                }
-               if (ret == 0 && pio_dev) {
-                       pio_string_write(pio_dev, vcpu);
+               if (ret == 0 && !pio_string_write(vcpu)) {
                        complete_pio(vcpu);
                        if (vcpu->arch.pio.count == 0)
                                ret = 1;
                }
-       } else if (pio_dev)
-               pr_unimpl(vcpu, "no string pio read support yet, "
-                      "port %x size %d count %ld\n",
-                       port, size, count);
+       }
+       /* no string PIO read support yet */
 
        return ret;
 }
@@ -2725,10 +2994,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
-               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-                       vcpu = kvm->vcpus[i];
-                       if (!vcpu)
-                               continue;
+               kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->cpu != freq->cpu)
                                continue;
                        if (!kvm_request_guest_time_update(vcpu))
@@ -2821,7 +3087,6 @@ void kvm_arch_exit(void)
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.halt_exits;
-       KVMTRACE_0D(HLT, vcpu, handler);
        if (irqchip_in_kernel(vcpu->kvm)) {
                vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
                return 1;
@@ -2852,7 +3117,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
        a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
 
-       KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
+       trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
        if (!is_long_mode(vcpu)) {
                nr &= 0xFFFFFFFF;
@@ -2952,8 +3217,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
                return 0;
        }
-       KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
-                   (u32)((u64)value >> 32), handler);
 
        return value;
 }
@@ -2961,9 +3224,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
                     unsigned long *rflags)
 {
-       KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
-                   (u32)((u64)val >> 32), handler);
-
        switch (cr) {
        case 0:
                kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@ -3073,11 +3333,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
                kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
        }
        kvm_x86_ops->skip_emulated_instruction(vcpu);
-       KVMTRACE_5D(CPUID, vcpu, function,
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
+       trace_kvm_cpuid(function,
+                       kvm_register_read(vcpu, VCPU_REGS_RAX),
+                       kvm_register_read(vcpu, VCPU_REGS_RBX),
+                       kvm_register_read(vcpu, VCPU_REGS_RCX),
+                       kvm_register_read(vcpu, VCPU_REGS_RDX));
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
@@ -3158,9 +3418,6 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
 static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
-
        /* try to reinject previous events if any */
        if (vcpu->arch.nmi_injected) {
                kvm_x86_ops->set_nmi(vcpu);
@@ -3276,7 +3533,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                set_debugreg(vcpu->arch.eff_db[3], 3);
        }
 
-       KVMTRACE_0D(VMENTRY, vcpu, entryexit);
+       trace_kvm_entry(vcpu->vcpu_id);
        kvm_x86_ops->run(vcpu, kvm_run);
 
        if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -4112,7 +4369,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
        /* Older userspace won't unhalt the vcpu on reset. */
-       if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
+       if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
            sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
            !(vcpu->arch.cr0 & X86_CR0_PE))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -4383,7 +4640,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        kvm = vcpu->kvm;
 
        vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-       if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
+       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
        else
                vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -4405,6 +4662,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                        goto fail_mmu_destroy;
        }
 
+       vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
+                                      GFP_KERNEL);
+       if (!vcpu->arch.mce_banks) {
+               r = -ENOMEM;
+               goto fail_mmu_destroy;
+       }
+       vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+
        return 0;
 
 fail_mmu_destroy:
@@ -4452,20 +4717,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 static void kvm_free_vcpus(struct kvm *kvm)
 {
        unsigned int i;
+       struct kvm_vcpu *vcpu;
 
        /*
         * Unpin any mmu pages first.
         */
-       for (i = 0; i < KVM_MAX_VCPUS; ++i)
-               if (kvm->vcpus[i])
-                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm_arch_vcpu_free(kvm->vcpus[i]);
-                       kvm->vcpus[i] = NULL;
-               }
-       }
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_unload_vcpu_mmu(vcpu);
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_arch_vcpu_free(vcpu);
+
+       mutex_lock(&kvm->lock);
+       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+               kvm->vcpus[i] = NULL;
 
+       atomic_set(&kvm->online_vcpus, 0);
+       mutex_unlock(&kvm->lock);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -4581,3 +4848,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
        return kvm_x86_ops->interrupt_allowed(vcpu);
 }
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);