target/i386/kvm/xen-emu.c

   1 /*
   2  * Xen HVM emulation support in KVM
   3  *
   4  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
   5  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8  * See the COPYING file in the top-level directory.
   9  *
  10  */
  11
  12 #include "qemu/osdep.h"
  13 #include "qemu/log.h"
  14 #include "qemu/main-loop.h"
  15 #include "hw/xen/xen.h"
  16 #include "sysemu/kvm_int.h"
  17 #include "sysemu/kvm_xen.h"
  18 #include "kvm/kvm_i386.h"
  19 #include "exec/address-spaces.h"
  20 #include "xen-emu.h"
  21 #include "trace.h"
  22 #include "sysemu/runstate.h"
  23
  24 #include "hw/pci/msi.h"
  25 #include "hw/i386/apic-msidef.h"
  26 #include "hw/i386/kvm/xen_overlay.h"
  27 #include "hw/i386/kvm/xen_evtchn.h"
  28 #include "hw/i386/kvm/xen_gnttab.h"
  29
  30 #include "hw/xen/interface/version.h"
  31 #include "hw/xen/interface/sched.h"
  32 #include "hw/xen/interface/memory.h"
  33 #include "hw/xen/interface/hvm/hvm_op.h"
  34 #include "hw/xen/interface/hvm/params.h"
  35 #include "hw/xen/interface/vcpu.h"
  36 #include "hw/xen/interface/event_channel.h"
  37
  38 #include "xen-compat.h"
  39
  40 #ifdef TARGET_X86_64
  41 #define hypercall_compat32(longmode) (!(longmode))
  42 #else
  43 #define hypercall_compat32(longmode) (false)
  44 #endif
  45
  46 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
  47                            size_t *len, bool is_write)
  48 {
  49         struct kvm_translation tr = {
  50             .linear_address = gva,
  51         };
  52
  53         if (len) {
  54             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
  55         }
  56
  57         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
  58             (is_write && !tr.writeable)) {
  59             return false;
  60         }
  61         *gpa = tr.physical_address;
  62         return true;
  63 }
  64
  65 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
  66                       bool is_write)
  67 {
  68     uint8_t *buf = (uint8_t *)_buf;
  69     uint64_t gpa;
  70     size_t len;
  71
  72     while (sz) {
  73         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
  74             return -EFAULT;
  75         }
  76         if (len > sz) {
  77             len = sz;
  78         }
  79
  80         cpu_physical_memory_rw(gpa, buf, len, is_write);
  81
  82         buf += len;
  83         sz -= len;
  84         gva += len;
  85     }
  86
  87     return 0;
  88 }
  89
  90 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
  91                                     size_t sz)
  92 {
  93     return kvm_gva_rw(cs, gva, buf, sz, false);
  94 }
  95
  96 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
  97                                   size_t sz)
  98 {
  99     return kvm_gva_rw(cs, gva, buf, sz, true);
 100 }
 101
 102 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
 103 {
 104     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
 105         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
 106     struct kvm_xen_hvm_config cfg = {
 107         .msr = hypercall_msr,
 108         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
 109     };
 110     int xen_caps, ret;
 111
 112     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
 113     if (required_caps & ~xen_caps) {
 114         error_report("kvm: Xen HVM guest support not present or insufficient");
 115         return -ENOSYS;
 116     }
 117
 118     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
 119         struct kvm_xen_hvm_attr ha = {
 120             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
 121             .u.xen_version = s->xen_version,
 122         };
 123         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
 124
 125         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 126     }
 127
 128     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
 129     if (ret < 0) {
 130         error_report("kvm: Failed to enable Xen HVM support: %s",
 131                      strerror(-ret));
 132         return ret;
 133     }
 134
 135     /* If called a second time, don't repeat the rest of the setup. */
 136     if (s->xen_caps) {
 137         return 0;
 138     }
 139
 140     /*
 141      * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
 142      * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
 143      *
 144      * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
 145      * such things to be polled at precisely the right time. We *could* do
 146      * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
 147      * the moment the IRQ is acked, and see if it should be reasserted.
 148      *
 149      * But the in-kernel irqchip is deprecated, so we're unlikely to add
 150      * that support in the kernel. Insist on using the split irqchip mode
 151      * instead.
 152      *
 153      * This leaves us polling for the level going low in QEMU, which lacks
 154      * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
 155      * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
 156      * the device (for which it has to unmap the device and trap access, for
 157      * some period after an IRQ!!). In the Xen case, we do it on exit from
 158      * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
 159      * Which is kind of icky, but less so than the VFIO one. I may fix them
 160      * both later...
 161      */
 162     if (!kvm_kernel_irqchip_split()) {
 163         error_report("kvm: Xen support requires kernel-irqchip=split");
 164         return -EINVAL;
 165     }
 166
 167     s->xen_caps = xen_caps;
 168     return 0;
 169 }
 170
 171 int kvm_xen_init_vcpu(CPUState *cs)
 172 {
 173     X86CPU *cpu = X86_CPU(cs);
 174     CPUX86State *env = &cpu->env;
 175     int err;
 176
 177     /*
 178      * The kernel needs to know the Xen/ACPI vCPU ID because that's
 179      * what the guest uses in hypercalls such as timers. It doesn't
 180      * match the APIC ID which is generally used for talking to the
 181      * kernel about vCPUs. And if vCPU threads race with creating
 182      * their KVM vCPUs out of order, it doesn't necessarily match
 183      * with the kernel's internal vCPU indices either.
 184      */
 185     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 186         struct kvm_xen_vcpu_attr va = {
 187             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
 188             .u.vcpu_id = cs->cpu_index,
 189         };
 190         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 191         if (err) {
 192             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
 193                          strerror(-err));
 194             return err;
 195         }
 196     }
 197
 198     env->xen_vcpu_info_gpa = INVALID_GPA;
 199     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 200     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 201     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 202
 203     return 0;
 204 }
 205
 206 uint32_t kvm_xen_get_caps(void)
 207 {
 208     return kvm_state->xen_caps;
 209 }
 210
 211 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
 212                                      int cmd, uint64_t arg)
 213 {
 214     int err = 0;
 215
 216     switch (cmd) {
 217     case XENVER_get_features: {
 218         struct xen_feature_info fi;
 219
 220         /* No need for 32/64 compat handling */
 221         qemu_build_assert(sizeof(fi) == 8);
 222
 223         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
 224         if (err) {
 225             break;
 226         }
 227
 228         fi.submap = 0;
 229         if (fi.submap_idx == 0) {
 230             fi.submap |= 1 << XENFEAT_writable_page_tables |
 231                          1 << XENFEAT_writable_descriptor_tables |
 232                          1 << XENFEAT_auto_translated_physmap |
 233                          1 << XENFEAT_supervisor_mode_kernel |
 234                          1 << XENFEAT_hvm_callback_vector;
 235         }
 236
 237         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
 238         break;
 239     }
 240
 241     default:
 242         return false;
 243     }
 244
 245     exit->u.hcall.result = err;
 246     return true;
 247 }
 248
 249 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
 250 {
 251     struct kvm_xen_vcpu_attr xhsi;
 252
 253     xhsi.type = type;
 254     xhsi.u.gpa = gpa;
 255
 256     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
 257
 258     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
 259 }
 260
 261 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
 262 {
 263     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 264     struct kvm_xen_vcpu_attr xva;
 265
 266     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
 267     xva.u.vector = vector;
 268
 269     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
 270
 271     return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
 272 }
 273
 274 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
 275 {
 276     X86CPU *cpu = X86_CPU(cs);
 277     CPUX86State *env = &cpu->env;
 278
 279     env->xen_vcpu_callback_vector = data.host_int;
 280
 281     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 282         kvm_xen_set_vcpu_callback_vector(cs);
 283     }
 284 }
 285
 286 static int set_vcpu_info(CPUState *cs, uint64_t gpa)
 287 {
 288     X86CPU *cpu = X86_CPU(cs);
 289     CPUX86State *env = &cpu->env;
 290     MemoryRegionSection mrs = { .mr = NULL };
 291     void *vcpu_info_hva = NULL;
 292     int ret;
 293
 294     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
 295     if (ret || gpa == INVALID_GPA) {
 296         goto out;
 297     }
 298
 299     mrs = memory_region_find(get_system_memory(), gpa,
 300                              sizeof(struct vcpu_info));
 301     if (mrs.mr && mrs.mr->ram_block &&
 302         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
 303         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
 304                                          mrs.offset_within_region);
 305     }
 306     if (!vcpu_info_hva) {
 307         if (mrs.mr) {
 308             memory_region_unref(mrs.mr);
 309             mrs.mr = NULL;
 310         }
 311         ret = -EINVAL;
 312     }
 313
 314  out:
 315     if (env->xen_vcpu_info_mr) {
 316         memory_region_unref(env->xen_vcpu_info_mr);
 317     }
 318     env->xen_vcpu_info_hva = vcpu_info_hva;
 319     env->xen_vcpu_info_mr = mrs.mr;
 320     return ret;
 321 }
 322
 323 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
 324 {
 325     X86CPU *cpu = X86_CPU(cs);
 326     CPUX86State *env = &cpu->env;
 327
 328     env->xen_vcpu_info_default_gpa = data.host_ulong;
 329
 330     /* Changing the default does nothing if a vcpu_info was explicitly set. */
 331     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
 332         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
 333     }
 334 }
 335
 336 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
 337 {
 338     X86CPU *cpu = X86_CPU(cs);
 339     CPUX86State *env = &cpu->env;
 340
 341     env->xen_vcpu_info_gpa = data.host_ulong;
 342
 343     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
 344 }
 345
 346 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
 347 {
 348     CPUState *cs = qemu_get_cpu(vcpu_id);
 349     if (!cs) {
 350         return NULL;
 351     }
 352
 353     return X86_CPU(cs)->env.xen_vcpu_info_hva;
 354 }
 355
 356 void kvm_xen_maybe_deassert_callback(CPUState *cs)
 357 {
 358     CPUX86State *env = &X86_CPU(cs)->env;
 359     struct vcpu_info *vi = env->xen_vcpu_info_hva;
 360     if (!vi) {
 361         return;
 362     }
 363
 364     /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
 365     if (!vi->evtchn_upcall_pending) {
 366         qemu_mutex_lock_iothread();
 367         /*
 368          * Check again now we have the lock, because it may have been
 369          * asserted in the interim. And we don't want to take the lock
 370          * every time because this is a fast path.
 371          */
 372         if (!vi->evtchn_upcall_pending) {
 373             X86_CPU(cs)->env.xen_callback_asserted = false;
 374             xen_evtchn_set_callback_level(0);
 375         }
 376         qemu_mutex_unlock_iothread();
 377     }
 378 }
 379
 380 void kvm_xen_set_callback_asserted(void)
 381 {
 382     CPUState *cs = qemu_get_cpu(0);
 383
 384     if (cs) {
 385         X86_CPU(cs)->env.xen_callback_asserted = true;
 386     }
 387 }
 388
 389 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
 390 {
 391     CPUState *cs = qemu_get_cpu(vcpu_id);
 392     uint8_t vector;
 393
 394     if (!cs) {
 395         return;
 396     }
 397
 398     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 399     if (vector) {
 400         /*
 401          * The per-vCPU callback vector injected via lapic. Just
 402          * deliver it as an MSI.
 403          */
 404         MSIMessage msg = {
 405             .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
 406             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
 407         };
 408         kvm_irqchip_send_msi(kvm_state, msg);
 409         return;
 410     }
 411
 412     switch (type) {
 413     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
 414         /*
 415          * If the evtchn_upcall_pending field in the vcpu_info is set, then
 416          * KVM will automatically deliver the vector on entering the vCPU
 417          * so all we have to do is kick it out.
 418          */
 419         qemu_cpu_kick(cs);
 420         break;
 421
 422     case HVM_PARAM_CALLBACK_TYPE_GSI:
 423     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
 424         if (vcpu_id == 0) {
 425             xen_evtchn_set_callback_level(1);
 426         }
 427         break;
 428     }
 429 }
 430
 431 static int kvm_xen_set_vcpu_timer(CPUState *cs)
 432 {
 433     X86CPU *cpu = X86_CPU(cs);
 434     CPUX86State *env = &cpu->env;
 435
 436     struct kvm_xen_vcpu_attr va = {
 437         .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
 438         .u.timer.port = env->xen_virq[VIRQ_TIMER],
 439         .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
 440         .u.timer.expires_ns = env->xen_singleshot_timer_ns,
 441     };
 442
 443     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 444 }
 445
 446 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
 447 {
 448     kvm_xen_set_vcpu_timer(cs);
 449 }
 450
 451 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
 452 {
 453     CPUState *cs = qemu_get_cpu(vcpu_id);
 454
 455     if (!cs) {
 456         return -ENOENT;
 457     }
 458
 459     /* cpu.h doesn't include the actual Xen header. */
 460     qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
 461
 462     if (virq >= NR_VIRQS) {
 463         return -EINVAL;
 464     }
 465
 466     if (port && X86_CPU(cs)->env.xen_virq[virq]) {
 467         return -EEXIST;
 468     }
 469
 470     X86_CPU(cs)->env.xen_virq[virq] = port;
 471     if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
 472         async_run_on_cpu(cs, do_set_vcpu_timer_virq,
 473                          RUN_ON_CPU_HOST_INT(port));
 474     }
 475     return 0;
 476 }
 477
 478 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
 479 {
 480     X86CPU *cpu = X86_CPU(cs);
 481     CPUX86State *env = &cpu->env;
 482
 483     env->xen_vcpu_time_info_gpa = data.host_ulong;
 484
 485     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 486                           env->xen_vcpu_time_info_gpa);
 487 }
 488
 489 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
 490 {
 491     X86CPU *cpu = X86_CPU(cs);
 492     CPUX86State *env = &cpu->env;
 493
 494     env->xen_vcpu_runstate_gpa = data.host_ulong;
 495
 496     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 497                           env->xen_vcpu_runstate_gpa);
 498 }
 499
 500 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
 501 {
 502     X86CPU *cpu = X86_CPU(cs);
 503     CPUX86State *env = &cpu->env;
 504
 505     env->xen_vcpu_info_gpa = INVALID_GPA;
 506     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 507     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 508     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 509     env->xen_vcpu_callback_vector = 0;
 510     env->xen_singleshot_timer_ns = 0;
 511     memset(env->xen_virq, 0, sizeof(env->xen_virq));
 512
 513     set_vcpu_info(cs, INVALID_GPA);
 514     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 515                           INVALID_GPA);
 516     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 517                           INVALID_GPA);
 518     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 519         kvm_xen_set_vcpu_callback_vector(cs);
 520         kvm_xen_set_vcpu_timer(cs);
 521     }
 522
 523 }
 524
 525 static int xen_set_shared_info(uint64_t gfn)
 526 {
 527     uint64_t gpa = gfn << TARGET_PAGE_BITS;
 528     int i, err;
 529
 530     QEMU_IOTHREAD_LOCK_GUARD();
 531
 532     /*
 533      * The xen_overlay device tells KVM about it too, since it had to
 534      * do that on migration load anyway (unless we're going to jump
 535      * through lots of hoops to maintain the fiction that this isn't
 536      * KVM-specific.
 537      */
 538     err = xen_overlay_map_shinfo_page(gpa);
 539     if (err) {
 540             return err;
 541     }
 542
 543     trace_kvm_xen_set_shared_info(gfn);
 544
 545     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
 546         CPUState *cpu = qemu_get_cpu(i);
 547         if (cpu) {
 548             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
 549                              RUN_ON_CPU_HOST_ULONG(gpa));
 550         }
 551         gpa += sizeof(vcpu_info_t);
 552     }
 553
 554     return err;
 555 }
 556
 557 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
 558 {
 559     switch (space) {
 560     case XENMAPSPACE_shared_info:
 561         if (idx > 0) {
 562             return -EINVAL;
 563         }
 564         return xen_set_shared_info(gfn);
 565
 566     case XENMAPSPACE_grant_table:
 567         return xen_gnttab_map_page(idx, gfn);
 568
 569     case XENMAPSPACE_gmfn:
 570     case XENMAPSPACE_gmfn_range:
 571         return -ENOTSUP;
 572
 573     case XENMAPSPACE_gmfn_foreign:
 574     case XENMAPSPACE_dev_mmio:
 575         return -EPERM;
 576
 577     default:
 578         return -EINVAL;
 579     }
 580 }
 581
 582 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
 583                              uint64_t arg)
 584 {
 585     struct xen_add_to_physmap xatp;
 586     CPUState *cs = CPU(cpu);
 587
 588     if (hypercall_compat32(exit->u.hcall.longmode)) {
 589         struct compat_xen_add_to_physmap xatp32;
 590
 591         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
 592         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
 593             return -EFAULT;
 594         }
 595         xatp.domid = xatp32.domid;
 596         xatp.size = xatp32.size;
 597         xatp.space = xatp32.space;
 598         xatp.idx = xatp32.idx;
 599         xatp.gpfn = xatp32.gpfn;
 600     } else {
 601         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
 602             return -EFAULT;
 603         }
 604     }
 605
 606     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
 607         return -ESRCH;
 608     }
 609
 610     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
 611 }
 612
 613 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
 614                                    uint64_t arg)
 615 {
 616     struct xen_add_to_physmap_batch xatpb;
 617     unsigned long idxs_gva, gpfns_gva, errs_gva;
 618     CPUState *cs = CPU(cpu);
 619     size_t op_sz;
 620
 621     if (hypercall_compat32(exit->u.hcall.longmode)) {
 622         struct compat_xen_add_to_physmap_batch xatpb32;
 623
 624         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
 625         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
 626             return -EFAULT;
 627         }
 628         xatpb.domid = xatpb32.domid;
 629         xatpb.space = xatpb32.space;
 630         xatpb.size = xatpb32.size;
 631
 632         idxs_gva = xatpb32.idxs.c;
 633         gpfns_gva = xatpb32.gpfns.c;
 634         errs_gva = xatpb32.errs.c;
 635         op_sz = sizeof(uint32_t);
 636     } else {
 637         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
 638             return -EFAULT;
 639         }
 640         op_sz = sizeof(unsigned long);
 641         idxs_gva = (unsigned long)xatpb.idxs.p;
 642         gpfns_gva = (unsigned long)xatpb.gpfns.p;
 643         errs_gva = (unsigned long)xatpb.errs.p;
 644     }
 645
 646     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
 647         return -ESRCH;
 648     }
 649
 650     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
 651     if (xatpb.space == XENMAPSPACE_gmfn_range) {
 652         return -EINVAL;
 653     }
 654
 655     while (xatpb.size--) {
 656         unsigned long idx = 0;
 657         unsigned long gpfn = 0;
 658         int err;
 659
 660         /* For 32-bit compat this only copies the low 32 bits of each */
 661         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
 662             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
 663             return -EFAULT;
 664         }
 665         idxs_gva += op_sz;
 666         gpfns_gva += op_sz;
 667
 668         err = add_to_physmap_one(xatpb.space, idx, gpfn);
 669
 670         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
 671             return -EFAULT;
 672         }
 673         errs_gva += sizeof(err);
 674     }
 675     return 0;
 676 }
 677
 678 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 679                                    int cmd, uint64_t arg)
 680 {
 681     int err;
 682
 683     switch (cmd) {
 684     case XENMEM_add_to_physmap:
 685         err = do_add_to_physmap(exit, cpu, arg);
 686         break;
 687
 688     case XENMEM_add_to_physmap_batch:
 689         err = do_add_to_physmap_batch(exit, cpu, arg);
 690         break;
 691
 692     default:
 693         return false;
 694     }
 695
 696     exit->u.hcall.result = err;
 697     return true;
 698 }
 699
 700 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
 701                              uint64_t arg)
 702 {
 703     CPUState *cs = CPU(cpu);
 704     struct xen_hvm_param hp;
 705     int err = 0;
 706
 707     /* No need for 32/64 compat handling */
 708     qemu_build_assert(sizeof(hp) == 16);
 709
 710     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
 711         err = -EFAULT;
 712         goto out;
 713     }
 714
 715     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
 716         err = -ESRCH;
 717         goto out;
 718     }
 719
 720     switch (hp.index) {
 721     case HVM_PARAM_CALLBACK_IRQ:
 722         qemu_mutex_lock_iothread();
 723         err = xen_evtchn_set_callback_param(hp.value);
 724         qemu_mutex_unlock_iothread();
 725         xen_set_long_mode(exit->u.hcall.longmode);
 726         break;
 727     default:
 728         return false;
 729     }
 730
 731 out:
 732     exit->u.hcall.result = err;
 733     return true;
 734 }
 735
 736 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
 737                                               X86CPU *cpu, uint64_t arg)
 738 {
 739     struct xen_hvm_evtchn_upcall_vector up;
 740     CPUState *target_cs;
 741
 742     /* No need for 32/64 compat handling */
 743     qemu_build_assert(sizeof(up) == 8);
 744
 745     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
 746         return -EFAULT;
 747     }
 748
 749     if (up.vector < 0x10) {
 750         return -EINVAL;
 751     }
 752
 753     target_cs = qemu_get_cpu(up.vcpu);
 754     if (!target_cs) {
 755         return -EINVAL;
 756     }
 757
 758     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
 759                      RUN_ON_CPU_HOST_INT(up.vector));
 760     return 0;
 761 }
 762
 763 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 764                                  int cmd, uint64_t arg)
 765 {
 766     int ret = -ENOSYS;
 767     switch (cmd) {
 768     case HVMOP_set_evtchn_upcall_vector:
 769         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
 770                                                  exit->u.hcall.params[0]);
 771         break;
 772
 773     case HVMOP_pagetable_dying:
 774         ret = -ENOSYS;
 775         break;
 776
 777     case HVMOP_set_param:
 778         return handle_set_param(exit, cpu, arg);
 779
 780     default:
 781         return false;
 782     }
 783
 784     exit->u.hcall.result = ret;
 785     return true;
 786 }
 787
 788 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
 789                                      uint64_t arg)
 790 {
 791     struct vcpu_register_vcpu_info rvi;
 792     uint64_t gpa;
 793
 794     /* No need for 32/64 compat handling */
 795     qemu_build_assert(sizeof(rvi) == 16);
 796     qemu_build_assert(sizeof(struct vcpu_info) == 64);
 797
 798     if (!target) {
 799         return -ENOENT;
 800     }
 801
 802     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
 803         return -EFAULT;
 804     }
 805
 806     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
 807         return -EINVAL;
 808     }
 809
 810     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
 811     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
 812     return 0;
 813 }
 814
 815 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
 816                                           uint64_t arg)
 817 {
 818     struct vcpu_register_time_memory_area tma;
 819     uint64_t gpa;
 820     size_t len;
 821
 822     /* No need for 32/64 compat handling */
 823     qemu_build_assert(sizeof(tma) == 8);
 824     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
 825
 826     if (!target) {
 827         return -ENOENT;
 828     }
 829
 830     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
 831         return -EFAULT;
 832     }
 833
 834     /*
 835      * Xen actually uses the GVA and does the translation through the guest
 836      * page tables each time. But Linux/KVM uses the GPA, on the assumption
 837      * that guests only ever use *global* addresses (kernel virtual addresses)
 838      * for it. If Linux is changed to redo the GVA→GPA translation each time,
 839      * it will offer a new vCPU attribute for that, and we'll use it instead.
 840      */
 841     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
 842         len < sizeof(struct vcpu_time_info)) {
 843         return -EFAULT;
 844     }
 845
 846     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
 847                      RUN_ON_CPU_HOST_ULONG(gpa));
 848     return 0;
 849 }
 850
 851 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
 852                                          uint64_t arg)
 853 {
 854     struct vcpu_register_runstate_memory_area rma;
 855     uint64_t gpa;
 856     size_t len;
 857
 858     /* No need for 32/64 compat handling */
 859     qemu_build_assert(sizeof(rma) == 8);
 860     /* The runstate area actually does change size, but Linux copes. */
 861
 862     if (!target) {
 863         return -ENOENT;
 864     }
 865
 866     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
 867         return -EFAULT;
 868     }
 869
 870     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
 871     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
 872         return -EFAULT;
 873     }
 874
 875     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
 876                      RUN_ON_CPU_HOST_ULONG(gpa));
 877     return 0;
 878 }
 879
 880 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 881                                   int cmd, int vcpu_id, uint64_t arg)
 882 {
 883     CPUState *dest = qemu_get_cpu(vcpu_id);
 884     CPUState *cs = CPU(cpu);
 885     int err;
 886
 887     switch (cmd) {
 888     case VCPUOP_register_runstate_memory_area:
 889         err = vcpuop_register_runstate_info(cs, dest, arg);
 890         break;
 891     case VCPUOP_register_vcpu_time_memory_area:
 892         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
 893         break;
 894     case VCPUOP_register_vcpu_info:
 895         err = vcpuop_register_vcpu_info(cs, dest, arg);
 896         break;
 897
 898     default:
 899         return false;
 900     }
 901
 902     exit->u.hcall.result = err;
 903     return true;
 904 }
 905
 906 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 907                                     int cmd, uint64_t arg)
 908 {
 909     CPUState *cs = CPU(cpu);
 910     int err = -ENOSYS;
 911
 912     switch (cmd) {
 913     case EVTCHNOP_init_control:
 914     case EVTCHNOP_expand_array:
 915     case EVTCHNOP_set_priority:
 916         /* We do not support FIFO channels at this point */
 917         err = -ENOSYS;
 918         break;
 919
 920     case EVTCHNOP_status: {
 921         struct evtchn_status status;
 922
 923         qemu_build_assert(sizeof(status) == 24);
 924         if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
 925             err = -EFAULT;
 926             break;
 927         }
 928
 929         err = xen_evtchn_status_op(&status);
 930         if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
 931             err = -EFAULT;
 932         }
 933         break;
 934     }
 935     case EVTCHNOP_close: {
 936         struct evtchn_close close;
 937
 938         qemu_build_assert(sizeof(close) == 4);
 939         if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
 940             err = -EFAULT;
 941             break;
 942         }
 943
 944         err = xen_evtchn_close_op(&close);
 945         break;
 946     }
 947     case EVTCHNOP_unmask: {
 948         struct evtchn_unmask unmask;
 949
 950         qemu_build_assert(sizeof(unmask) == 4);
 951         if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
 952             err = -EFAULT;
 953             break;
 954         }
 955
 956         err = xen_evtchn_unmask_op(&unmask);
 957         break;
 958     }
 959     case EVTCHNOP_bind_virq: {
 960         struct evtchn_bind_virq virq;
 961
 962         qemu_build_assert(sizeof(virq) == 12);
 963         if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
 964             err = -EFAULT;
 965             break;
 966         }
 967
 968         err = xen_evtchn_bind_virq_op(&virq);
 969         if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
 970             err = -EFAULT;
 971         }
 972         break;
 973     }
 974     case EVTCHNOP_bind_ipi: {
 975         struct evtchn_bind_ipi ipi;
 976
 977         qemu_build_assert(sizeof(ipi) == 8);
 978         if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
 979             err = -EFAULT;
 980             break;
 981         }
 982
 983         err = xen_evtchn_bind_ipi_op(&ipi);
 984         if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
 985             err = -EFAULT;
 986         }
 987         break;
 988     }
 989     case EVTCHNOP_send: {
 990         struct evtchn_send send;
 991
 992         qemu_build_assert(sizeof(send) == 4);
 993         if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
 994             err = -EFAULT;
 995             break;
 996         }
 997
 998         err = xen_evtchn_send_op(&send);
 999         break;
1000     }
1001     case EVTCHNOP_alloc_unbound: {
1002         struct evtchn_alloc_unbound alloc;
1003
1004         qemu_build_assert(sizeof(alloc) == 8);
1005         if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1006             err = -EFAULT;
1007             break;
1008         }
1009
1010         err = xen_evtchn_alloc_unbound_op(&alloc);
1011         if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1012             err = -EFAULT;
1013         }
1014         break;
1015     }
1016     case EVTCHNOP_bind_interdomain: {
1017         struct evtchn_bind_interdomain interdomain;
1018
1019         qemu_build_assert(sizeof(interdomain) == 12);
1020         if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1021             err = -EFAULT;
1022             break;
1023         }
1024
1025         err = xen_evtchn_bind_interdomain_op(&interdomain);
1026         if (!err &&
1027             kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1028             err = -EFAULT;
1029         }
1030         break;
1031     }
1032     case EVTCHNOP_bind_vcpu: {
1033         struct evtchn_bind_vcpu vcpu;
1034
1035         qemu_build_assert(sizeof(vcpu) == 8);
1036         if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1037             err = -EFAULT;
1038             break;
1039         }
1040
1041         err = xen_evtchn_bind_vcpu_op(&vcpu);
1042         break;
1043     }
1044     case EVTCHNOP_reset: {
1045         struct evtchn_reset reset;
1046
1047         qemu_build_assert(sizeof(reset) == 2);
1048         if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1049             err = -EFAULT;
1050             break;
1051         }
1052
1053         err = xen_evtchn_reset_op(&reset);
1054         break;
1055     }
1056     default:
1057         return false;
1058     }
1059
1060     exit->u.hcall.result = err;
1061     return true;
1062 }
1063
1064 int kvm_xen_soft_reset(void)
1065 {
1066     CPUState *cpu;
1067     int err;
1068
1069     assert(qemu_mutex_iothread_locked());
1070
1071     trace_kvm_xen_soft_reset();
1072
1073     err = xen_evtchn_soft_reset();
1074     if (err) {
1075         return err;
1076     }
1077
1078     /*
1079      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1080      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1081      * to deliver to the timer interrupt and treats that as 'disabled'.
1082      */
1083     err = xen_evtchn_set_callback_param(0);
1084     if (err) {
1085         return err;
1086     }
1087
1088     CPU_FOREACH(cpu) {
1089         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1090     }
1091
1092     err = xen_overlay_map_shinfo_page(INVALID_GFN);
1093     if (err) {
1094         return err;
1095     }
1096
1097     return 0;
1098 }
1099
1100 static int schedop_shutdown(CPUState *cs, uint64_t arg)
1101 {
1102     struct sched_shutdown shutdown;
1103     int ret = 0;
1104
1105     /* No need for 32/64 compat handling */
1106     qemu_build_assert(sizeof(shutdown) == 4);
1107
1108     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1109         return -EFAULT;
1110     }
1111
1112     switch (shutdown.reason) {
1113     case SHUTDOWN_crash:
1114         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1115         qemu_system_guest_panicked(NULL);
1116         break;
1117
1118     case SHUTDOWN_reboot:
1119         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1120         break;
1121
1122     case SHUTDOWN_poweroff:
1123         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1124         break;
1125
1126     case SHUTDOWN_soft_reset:
1127         qemu_mutex_lock_iothread();
1128         ret = kvm_xen_soft_reset();
1129         qemu_mutex_unlock_iothread();
1130         break;
1131
1132     default:
1133         ret = -EINVAL;
1134         break;
1135     }
1136
1137     return ret;
1138 }
1139
1140 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1141                                    int cmd, uint64_t arg)
1142 {
1143     CPUState *cs = CPU(cpu);
1144     int err = -ENOSYS;
1145
1146     switch (cmd) {
1147     case SCHEDOP_shutdown:
1148         err = schedop_shutdown(cs, arg);
1149         break;
1150
1151     case SCHEDOP_poll:
1152         /*
1153          * Linux will panic if this doesn't work. Just yield; it's not
1154          * worth overthinking it because with event channel handling
1155          * in KVM, the kernel will intercept this and it will never
1156          * reach QEMU anyway. The semantics of the hypercall explicltly
1157          * permit spurious wakeups.
1158          */
1159     case SCHEDOP_yield:
1160         sched_yield();
1161         err = 0;
1162         break;
1163
1164     default:
1165         return false;
1166     }
1167
1168     exit->u.hcall.result = err;
1169     return true;
1170 }
1171
1172 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1173 {
1174     uint16_t code = exit->u.hcall.input;
1175
1176     if (exit->u.hcall.cpl > 0) {
1177         exit->u.hcall.result = -EPERM;
1178         return true;
1179     }
1180
1181     switch (code) {
1182     case __HYPERVISOR_sched_op:
1183         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1184                                       exit->u.hcall.params[1]);
1185     case __HYPERVISOR_event_channel_op:
1186         return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
1187                                        exit->u.hcall.params[1]);
1188     case __HYPERVISOR_vcpu_op:
1189         return kvm_xen_hcall_vcpu_op(exit, cpu,
1190                                      exit->u.hcall.params[0],
1191                                      exit->u.hcall.params[1],
1192                                      exit->u.hcall.params[2]);
1193     case __HYPERVISOR_hvm_op:
1194         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1195                                     exit->u.hcall.params[1]);
1196     case __HYPERVISOR_memory_op:
1197         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1198                                        exit->u.hcall.params[1]);
1199     case __HYPERVISOR_xen_version:
1200         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1201                                          exit->u.hcall.params[1]);
1202     default:
1203         return false;
1204     }
1205 }
1206
1207 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1208 {
1209     if (exit->type != KVM_EXIT_XEN_HCALL) {
1210         return -1;
1211     }
1212
1213     /*
1214      * The kernel latches the guest 32/64 mode when the MSR is used to fill
1215      * the hypercall page. So if we see a hypercall in a mode that doesn't
1216      * match our own idea of the guest mode, fetch the kernel's idea of the
1217      * "long mode" to remain in sync.
1218      */
1219     if (exit->u.hcall.longmode != xen_is_long_mode()) {
1220         xen_sync_long_mode();
1221     }
1222
1223     if (!do_kvm_xen_handle_exit(cpu, exit)) {
1224         /*
1225          * Some hypercalls will be deliberately "implemented" by returning
1226          * -ENOSYS. This case is for hypercalls which are unexpected.
1227          */
1228         exit->u.hcall.result = -ENOSYS;
1229         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1230                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1231                       (uint64_t)exit->u.hcall.input,
1232                       (uint64_t)exit->u.hcall.params[0],
1233                       (uint64_t)exit->u.hcall.params[1],
1234                       (uint64_t)exit->u.hcall.params[2]);
1235     }
1236
1237     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1238                             exit->u.hcall.input, exit->u.hcall.params[0],
1239                             exit->u.hcall.params[1], exit->u.hcall.params[2],
1240                             exit->u.hcall.result);
1241     return 0;
1242 }
1243
1244 uint16_t kvm_xen_get_gnttab_max_frames(void)
1245 {
1246     KVMState *s = KVM_STATE(current_accel());
1247     return s->xen_gnttab_max_frames;
1248 }
1249
1250 int kvm_put_xen_state(CPUState *cs)
1251 {
1252     X86CPU *cpu = X86_CPU(cs);
1253     CPUX86State *env = &cpu->env;
1254     uint64_t gpa;
1255     int ret;
1256
1257     gpa = env->xen_vcpu_info_gpa;
1258     if (gpa == INVALID_GPA) {
1259         gpa = env->xen_vcpu_info_default_gpa;
1260     }
1261
1262     if (gpa != INVALID_GPA) {
1263         ret = set_vcpu_info(cs, gpa);
1264         if (ret < 0) {
1265             return ret;
1266         }
1267     }
1268
1269     gpa = env->xen_vcpu_time_info_gpa;
1270     if (gpa != INVALID_GPA) {
1271         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1272                                     gpa);
1273         if (ret < 0) {
1274             return ret;
1275         }
1276     }
1277
1278     gpa = env->xen_vcpu_runstate_gpa;
1279     if (gpa != INVALID_GPA) {
1280         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1281                                     gpa);
1282         if (ret < 0) {
1283             return ret;
1284         }
1285     }
1286
1287     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1288         return 0;
1289     }
1290
1291     if (env->xen_vcpu_callback_vector) {
1292         ret = kvm_xen_set_vcpu_callback_vector(cs);
1293         if (ret < 0) {
1294             return ret;
1295         }
1296     }
1297
1298     if (env->xen_virq[VIRQ_TIMER]) {
1299         ret = kvm_xen_set_vcpu_timer(cs);
1300         if (ret < 0) {
1301             return ret;
1302         }
1303     }
1304     return 0;
1305 }
1306
1307 int kvm_get_xen_state(CPUState *cs)
1308 {
1309     X86CPU *cpu = X86_CPU(cs);
1310     CPUX86State *env = &cpu->env;
1311     uint64_t gpa;
1312     int ret;
1313
1314     /*
1315      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1316      * to it. It's up to userspace to *assume* that any page shared thus is
1317      * always considered dirty. The shared_info page is different since it's
1318      * an overlay and migrated separately anyway.
1319      */
1320     gpa = env->xen_vcpu_info_gpa;
1321     if (gpa == INVALID_GPA) {
1322         gpa = env->xen_vcpu_info_default_gpa;
1323     }
1324     if (gpa != INVALID_GPA) {
1325         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1326                                                      gpa,
1327                                                      sizeof(struct vcpu_info));
1328         if (mrs.mr &&
1329             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1330             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1331                                     sizeof(struct vcpu_info));
1332         }
1333     }
1334
1335     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1336         return 0;
1337     }
1338
1339     /*
1340      * If the kernel is accelerating timers, read out the current value of the
1341      * singleshot timer deadline.
1342      */
1343     if (env->xen_virq[VIRQ_TIMER]) {
1344         struct kvm_xen_vcpu_attr va = {
1345             .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1346         };
1347         ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1348         if (ret < 0) {
1349             return ret;
1350         }
1351         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1352     }
1353
1354     return 0;
1355 }