target/i386/kvm/xen-emu.c

   1 /*
   2  * Xen HVM emulation support in KVM
   3  *
   4  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
   5  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8  * See the COPYING file in the top-level directory.
   9  *
  10  */
  11
  12 #include "qemu/osdep.h"
  13 #include "qemu/log.h"
  14 #include "qemu/main-loop.h"
  15 #include "hw/xen/xen.h"
  16 #include "sysemu/kvm_int.h"
  17 #include "sysemu/kvm_xen.h"
  18 #include "kvm/kvm_i386.h"
  19 #include "exec/address-spaces.h"
  20 #include "xen-emu.h"
  21 #include "trace.h"
  22 #include "sysemu/runstate.h"
  23
  24 #include "hw/pci/msi.h"
  25 #include "hw/i386/apic-msidef.h"
  26 #include "hw/i386/kvm/xen_overlay.h"
  27 #include "hw/i386/kvm/xen_evtchn.h"
  28 #include "hw/i386/kvm/xen_gnttab.h"
  29
  30 #include "hw/xen/interface/version.h"
  31 #include "hw/xen/interface/sched.h"
  32 #include "hw/xen/interface/memory.h"
  33 #include "hw/xen/interface/hvm/hvm_op.h"
  34 #include "hw/xen/interface/hvm/params.h"
  35 #include "hw/xen/interface/vcpu.h"
  36 #include "hw/xen/interface/event_channel.h"
  37 #include "hw/xen/interface/grant_table.h"
  38
  39 #include "xen-compat.h"
  40
  41 #ifdef TARGET_X86_64
  42 #define hypercall_compat32(longmode) (!(longmode))
  43 #else
  44 #define hypercall_compat32(longmode) (false)
  45 #endif
  46
  47 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
  48                            size_t *len, bool is_write)
  49 {
  50         struct kvm_translation tr = {
  51             .linear_address = gva,
  52         };
  53
  54         if (len) {
  55             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
  56         }
  57
  58         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
  59             (is_write && !tr.writeable)) {
  60             return false;
  61         }
  62         *gpa = tr.physical_address;
  63         return true;
  64 }
  65
  66 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
  67                       bool is_write)
  68 {
  69     uint8_t *buf = (uint8_t *)_buf;
  70     uint64_t gpa;
  71     size_t len;
  72
  73     while (sz) {
  74         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
  75             return -EFAULT;
  76         }
  77         if (len > sz) {
  78             len = sz;
  79         }
  80
  81         cpu_physical_memory_rw(gpa, buf, len, is_write);
  82
  83         buf += len;
  84         sz -= len;
  85         gva += len;
  86     }
  87
  88     return 0;
  89 }
  90
  91 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
  92                                     size_t sz)
  93 {
  94     return kvm_gva_rw(cs, gva, buf, sz, false);
  95 }
  96
  97 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
  98                                   size_t sz)
  99 {
 100     return kvm_gva_rw(cs, gva, buf, sz, true);
 101 }
 102
 103 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
 104 {
 105     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
 106         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
 107     struct kvm_xen_hvm_config cfg = {
 108         .msr = hypercall_msr,
 109         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
 110     };
 111     int xen_caps, ret;
 112
 113     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
 114     if (required_caps & ~xen_caps) {
 115         error_report("kvm: Xen HVM guest support not present or insufficient");
 116         return -ENOSYS;
 117     }
 118
 119     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
 120         struct kvm_xen_hvm_attr ha = {
 121             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
 122             .u.xen_version = s->xen_version,
 123         };
 124         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
 125
 126         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 127     }
 128
 129     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
 130     if (ret < 0) {
 131         error_report("kvm: Failed to enable Xen HVM support: %s",
 132                      strerror(-ret));
 133         return ret;
 134     }
 135
 136     /* If called a second time, don't repeat the rest of the setup. */
 137     if (s->xen_caps) {
 138         return 0;
 139     }
 140
 141     /*
 142      * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
 143      * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
 144      *
 145      * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
 146      * such things to be polled at precisely the right time. We *could* do
 147      * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
 148      * the moment the IRQ is acked, and see if it should be reasserted.
 149      *
 150      * But the in-kernel irqchip is deprecated, so we're unlikely to add
 151      * that support in the kernel. Insist on using the split irqchip mode
 152      * instead.
 153      *
 154      * This leaves us polling for the level going low in QEMU, which lacks
 155      * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
 156      * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
 157      * the device (for which it has to unmap the device and trap access, for
 158      * some period after an IRQ!!). In the Xen case, we do it on exit from
 159      * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
 160      * Which is kind of icky, but less so than the VFIO one. I may fix them
 161      * both later...
 162      */
 163     if (!kvm_kernel_irqchip_split()) {
 164         error_report("kvm: Xen support requires kernel-irqchip=split");
 165         return -EINVAL;
 166     }
 167
 168     s->xen_caps = xen_caps;
 169     return 0;
 170 }
 171
 172 int kvm_xen_init_vcpu(CPUState *cs)
 173 {
 174     X86CPU *cpu = X86_CPU(cs);
 175     CPUX86State *env = &cpu->env;
 176     int err;
 177
 178     /*
 179      * The kernel needs to know the Xen/ACPI vCPU ID because that's
 180      * what the guest uses in hypercalls such as timers. It doesn't
 181      * match the APIC ID which is generally used for talking to the
 182      * kernel about vCPUs. And if vCPU threads race with creating
 183      * their KVM vCPUs out of order, it doesn't necessarily match
 184      * with the kernel's internal vCPU indices either.
 185      */
 186     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 187         struct kvm_xen_vcpu_attr va = {
 188             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
 189             .u.vcpu_id = cs->cpu_index,
 190         };
 191         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 192         if (err) {
 193             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
 194                          strerror(-err));
 195             return err;
 196         }
 197     }
 198
 199     env->xen_vcpu_info_gpa = INVALID_GPA;
 200     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 201     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 202     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 203
 204     return 0;
 205 }
 206
 207 uint32_t kvm_xen_get_caps(void)
 208 {
 209     return kvm_state->xen_caps;
 210 }
 211
 212 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
 213                                      int cmd, uint64_t arg)
 214 {
 215     int err = 0;
 216
 217     switch (cmd) {
 218     case XENVER_get_features: {
 219         struct xen_feature_info fi;
 220
 221         /* No need for 32/64 compat handling */
 222         qemu_build_assert(sizeof(fi) == 8);
 223
 224         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
 225         if (err) {
 226             break;
 227         }
 228
 229         fi.submap = 0;
 230         if (fi.submap_idx == 0) {
 231             fi.submap |= 1 << XENFEAT_writable_page_tables |
 232                          1 << XENFEAT_writable_descriptor_tables |
 233                          1 << XENFEAT_auto_translated_physmap |
 234                          1 << XENFEAT_supervisor_mode_kernel |
 235                          1 << XENFEAT_hvm_callback_vector;
 236         }
 237
 238         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
 239         break;
 240     }
 241
 242     default:
 243         return false;
 244     }
 245
 246     exit->u.hcall.result = err;
 247     return true;
 248 }
 249
 250 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
 251 {
 252     struct kvm_xen_vcpu_attr xhsi;
 253
 254     xhsi.type = type;
 255     xhsi.u.gpa = gpa;
 256
 257     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
 258
 259     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
 260 }
 261
 262 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
 263 {
 264     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 265     struct kvm_xen_vcpu_attr xva;
 266
 267     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
 268     xva.u.vector = vector;
 269
 270     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
 271
 272     return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
 273 }
 274
 275 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
 276 {
 277     X86CPU *cpu = X86_CPU(cs);
 278     CPUX86State *env = &cpu->env;
 279
 280     env->xen_vcpu_callback_vector = data.host_int;
 281
 282     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 283         kvm_xen_set_vcpu_callback_vector(cs);
 284     }
 285 }
 286
 287 static int set_vcpu_info(CPUState *cs, uint64_t gpa)
 288 {
 289     X86CPU *cpu = X86_CPU(cs);
 290     CPUX86State *env = &cpu->env;
 291     MemoryRegionSection mrs = { .mr = NULL };
 292     void *vcpu_info_hva = NULL;
 293     int ret;
 294
 295     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
 296     if (ret || gpa == INVALID_GPA) {
 297         goto out;
 298     }
 299
 300     mrs = memory_region_find(get_system_memory(), gpa,
 301                              sizeof(struct vcpu_info));
 302     if (mrs.mr && mrs.mr->ram_block &&
 303         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
 304         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
 305                                          mrs.offset_within_region);
 306     }
 307     if (!vcpu_info_hva) {
 308         if (mrs.mr) {
 309             memory_region_unref(mrs.mr);
 310             mrs.mr = NULL;
 311         }
 312         ret = -EINVAL;
 313     }
 314
 315  out:
 316     if (env->xen_vcpu_info_mr) {
 317         memory_region_unref(env->xen_vcpu_info_mr);
 318     }
 319     env->xen_vcpu_info_hva = vcpu_info_hva;
 320     env->xen_vcpu_info_mr = mrs.mr;
 321     return ret;
 322 }
 323
 324 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
 325 {
 326     X86CPU *cpu = X86_CPU(cs);
 327     CPUX86State *env = &cpu->env;
 328
 329     env->xen_vcpu_info_default_gpa = data.host_ulong;
 330
 331     /* Changing the default does nothing if a vcpu_info was explicitly set. */
 332     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
 333         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
 334     }
 335 }
 336
 337 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
 338 {
 339     X86CPU *cpu = X86_CPU(cs);
 340     CPUX86State *env = &cpu->env;
 341
 342     env->xen_vcpu_info_gpa = data.host_ulong;
 343
 344     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
 345 }
 346
 347 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
 348 {
 349     CPUState *cs = qemu_get_cpu(vcpu_id);
 350     if (!cs) {
 351         return NULL;
 352     }
 353
 354     return X86_CPU(cs)->env.xen_vcpu_info_hva;
 355 }
 356
 357 void kvm_xen_maybe_deassert_callback(CPUState *cs)
 358 {
 359     CPUX86State *env = &X86_CPU(cs)->env;
 360     struct vcpu_info *vi = env->xen_vcpu_info_hva;
 361     if (!vi) {
 362         return;
 363     }
 364
 365     /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
 366     if (!vi->evtchn_upcall_pending) {
 367         qemu_mutex_lock_iothread();
 368         /*
 369          * Check again now we have the lock, because it may have been
 370          * asserted in the interim. And we don't want to take the lock
 371          * every time because this is a fast path.
 372          */
 373         if (!vi->evtchn_upcall_pending) {
 374             X86_CPU(cs)->env.xen_callback_asserted = false;
 375             xen_evtchn_set_callback_level(0);
 376         }
 377         qemu_mutex_unlock_iothread();
 378     }
 379 }
 380
 381 void kvm_xen_set_callback_asserted(void)
 382 {
 383     CPUState *cs = qemu_get_cpu(0);
 384
 385     if (cs) {
 386         X86_CPU(cs)->env.xen_callback_asserted = true;
 387     }
 388 }
 389
 390 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
 391 {
 392     CPUState *cs = qemu_get_cpu(vcpu_id);
 393     uint8_t vector;
 394
 395     if (!cs) {
 396         return;
 397     }
 398
 399     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 400     if (vector) {
 401         /*
 402          * The per-vCPU callback vector injected via lapic. Just
 403          * deliver it as an MSI.
 404          */
 405         MSIMessage msg = {
 406             .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
 407             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
 408         };
 409         kvm_irqchip_send_msi(kvm_state, msg);
 410         return;
 411     }
 412
 413     switch (type) {
 414     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
 415         /*
 416          * If the evtchn_upcall_pending field in the vcpu_info is set, then
 417          * KVM will automatically deliver the vector on entering the vCPU
 418          * so all we have to do is kick it out.
 419          */
 420         qemu_cpu_kick(cs);
 421         break;
 422
 423     case HVM_PARAM_CALLBACK_TYPE_GSI:
 424     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
 425         if (vcpu_id == 0) {
 426             xen_evtchn_set_callback_level(1);
 427         }
 428         break;
 429     }
 430 }
 431
 432 static int kvm_xen_set_vcpu_timer(CPUState *cs)
 433 {
 434     X86CPU *cpu = X86_CPU(cs);
 435     CPUX86State *env = &cpu->env;
 436
 437     struct kvm_xen_vcpu_attr va = {
 438         .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
 439         .u.timer.port = env->xen_virq[VIRQ_TIMER],
 440         .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
 441         .u.timer.expires_ns = env->xen_singleshot_timer_ns,
 442     };
 443
 444     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 445 }
 446
 447 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
 448 {
 449     kvm_xen_set_vcpu_timer(cs);
 450 }
 451
 452 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
 453 {
 454     CPUState *cs = qemu_get_cpu(vcpu_id);
 455
 456     if (!cs) {
 457         return -ENOENT;
 458     }
 459
 460     /* cpu.h doesn't include the actual Xen header. */
 461     qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
 462
 463     if (virq >= NR_VIRQS) {
 464         return -EINVAL;
 465     }
 466
 467     if (port && X86_CPU(cs)->env.xen_virq[virq]) {
 468         return -EEXIST;
 469     }
 470
 471     X86_CPU(cs)->env.xen_virq[virq] = port;
 472     if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
 473         async_run_on_cpu(cs, do_set_vcpu_timer_virq,
 474                          RUN_ON_CPU_HOST_INT(port));
 475     }
 476     return 0;
 477 }
 478
 479 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
 480 {
 481     X86CPU *cpu = X86_CPU(cs);
 482     CPUX86State *env = &cpu->env;
 483
 484     env->xen_vcpu_time_info_gpa = data.host_ulong;
 485
 486     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 487                           env->xen_vcpu_time_info_gpa);
 488 }
 489
 490 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
 491 {
 492     X86CPU *cpu = X86_CPU(cs);
 493     CPUX86State *env = &cpu->env;
 494
 495     env->xen_vcpu_runstate_gpa = data.host_ulong;
 496
 497     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 498                           env->xen_vcpu_runstate_gpa);
 499 }
 500
 501 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
 502 {
 503     X86CPU *cpu = X86_CPU(cs);
 504     CPUX86State *env = &cpu->env;
 505
 506     env->xen_vcpu_info_gpa = INVALID_GPA;
 507     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 508     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 509     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 510     env->xen_vcpu_callback_vector = 0;
 511     env->xen_singleshot_timer_ns = 0;
 512     memset(env->xen_virq, 0, sizeof(env->xen_virq));
 513
 514     set_vcpu_info(cs, INVALID_GPA);
 515     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 516                           INVALID_GPA);
 517     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 518                           INVALID_GPA);
 519     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 520         kvm_xen_set_vcpu_callback_vector(cs);
 521         kvm_xen_set_vcpu_timer(cs);
 522     }
 523
 524 }
 525
 526 static int xen_set_shared_info(uint64_t gfn)
 527 {
 528     uint64_t gpa = gfn << TARGET_PAGE_BITS;
 529     int i, err;
 530
 531     QEMU_IOTHREAD_LOCK_GUARD();
 532
 533     /*
 534      * The xen_overlay device tells KVM about it too, since it had to
 535      * do that on migration load anyway (unless we're going to jump
 536      * through lots of hoops to maintain the fiction that this isn't
 537      * KVM-specific.
 538      */
 539     err = xen_overlay_map_shinfo_page(gpa);
 540     if (err) {
 541             return err;
 542     }
 543
 544     trace_kvm_xen_set_shared_info(gfn);
 545
 546     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
 547         CPUState *cpu = qemu_get_cpu(i);
 548         if (cpu) {
 549             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
 550                              RUN_ON_CPU_HOST_ULONG(gpa));
 551         }
 552         gpa += sizeof(vcpu_info_t);
 553     }
 554
 555     return err;
 556 }
 557
 558 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
 559 {
 560     switch (space) {
 561     case XENMAPSPACE_shared_info:
 562         if (idx > 0) {
 563             return -EINVAL;
 564         }
 565         return xen_set_shared_info(gfn);
 566
 567     case XENMAPSPACE_grant_table:
 568         return xen_gnttab_map_page(idx, gfn);
 569
 570     case XENMAPSPACE_gmfn:
 571     case XENMAPSPACE_gmfn_range:
 572         return -ENOTSUP;
 573
 574     case XENMAPSPACE_gmfn_foreign:
 575     case XENMAPSPACE_dev_mmio:
 576         return -EPERM;
 577
 578     default:
 579         return -EINVAL;
 580     }
 581 }
 582
 583 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
 584                              uint64_t arg)
 585 {
 586     struct xen_add_to_physmap xatp;
 587     CPUState *cs = CPU(cpu);
 588
 589     if (hypercall_compat32(exit->u.hcall.longmode)) {
 590         struct compat_xen_add_to_physmap xatp32;
 591
 592         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
 593         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
 594             return -EFAULT;
 595         }
 596         xatp.domid = xatp32.domid;
 597         xatp.size = xatp32.size;
 598         xatp.space = xatp32.space;
 599         xatp.idx = xatp32.idx;
 600         xatp.gpfn = xatp32.gpfn;
 601     } else {
 602         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
 603             return -EFAULT;
 604         }
 605     }
 606
 607     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
 608         return -ESRCH;
 609     }
 610
 611     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
 612 }
 613
 614 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
 615                                    uint64_t arg)
 616 {
 617     struct xen_add_to_physmap_batch xatpb;
 618     unsigned long idxs_gva, gpfns_gva, errs_gva;
 619     CPUState *cs = CPU(cpu);
 620     size_t op_sz;
 621
 622     if (hypercall_compat32(exit->u.hcall.longmode)) {
 623         struct compat_xen_add_to_physmap_batch xatpb32;
 624
 625         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
 626         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
 627             return -EFAULT;
 628         }
 629         xatpb.domid = xatpb32.domid;
 630         xatpb.space = xatpb32.space;
 631         xatpb.size = xatpb32.size;
 632
 633         idxs_gva = xatpb32.idxs.c;
 634         gpfns_gva = xatpb32.gpfns.c;
 635         errs_gva = xatpb32.errs.c;
 636         op_sz = sizeof(uint32_t);
 637     } else {
 638         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
 639             return -EFAULT;
 640         }
 641         op_sz = sizeof(unsigned long);
 642         idxs_gva = (unsigned long)xatpb.idxs.p;
 643         gpfns_gva = (unsigned long)xatpb.gpfns.p;
 644         errs_gva = (unsigned long)xatpb.errs.p;
 645     }
 646
 647     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
 648         return -ESRCH;
 649     }
 650
 651     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
 652     if (xatpb.space == XENMAPSPACE_gmfn_range) {
 653         return -EINVAL;
 654     }
 655
 656     while (xatpb.size--) {
 657         unsigned long idx = 0;
 658         unsigned long gpfn = 0;
 659         int err;
 660
 661         /* For 32-bit compat this only copies the low 32 bits of each */
 662         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
 663             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
 664             return -EFAULT;
 665         }
 666         idxs_gva += op_sz;
 667         gpfns_gva += op_sz;
 668
 669         err = add_to_physmap_one(xatpb.space, idx, gpfn);
 670
 671         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
 672             return -EFAULT;
 673         }
 674         errs_gva += sizeof(err);
 675     }
 676     return 0;
 677 }
 678
 679 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 680                                    int cmd, uint64_t arg)
 681 {
 682     int err;
 683
 684     switch (cmd) {
 685     case XENMEM_add_to_physmap:
 686         err = do_add_to_physmap(exit, cpu, arg);
 687         break;
 688
 689     case XENMEM_add_to_physmap_batch:
 690         err = do_add_to_physmap_batch(exit, cpu, arg);
 691         break;
 692
 693     default:
 694         return false;
 695     }
 696
 697     exit->u.hcall.result = err;
 698     return true;
 699 }
 700
 701 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
 702                              uint64_t arg)
 703 {
 704     CPUState *cs = CPU(cpu);
 705     struct xen_hvm_param hp;
 706     int err = 0;
 707
 708     /* No need for 32/64 compat handling */
 709     qemu_build_assert(sizeof(hp) == 16);
 710
 711     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
 712         err = -EFAULT;
 713         goto out;
 714     }
 715
 716     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
 717         err = -ESRCH;
 718         goto out;
 719     }
 720
 721     switch (hp.index) {
 722     case HVM_PARAM_CALLBACK_IRQ:
 723         qemu_mutex_lock_iothread();
 724         err = xen_evtchn_set_callback_param(hp.value);
 725         qemu_mutex_unlock_iothread();
 726         xen_set_long_mode(exit->u.hcall.longmode);
 727         break;
 728     default:
 729         return false;
 730     }
 731
 732 out:
 733     exit->u.hcall.result = err;
 734     return true;
 735 }
 736
 737 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
 738                                               X86CPU *cpu, uint64_t arg)
 739 {
 740     struct xen_hvm_evtchn_upcall_vector up;
 741     CPUState *target_cs;
 742
 743     /* No need for 32/64 compat handling */
 744     qemu_build_assert(sizeof(up) == 8);
 745
 746     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
 747         return -EFAULT;
 748     }
 749
 750     if (up.vector < 0x10) {
 751         return -EINVAL;
 752     }
 753
 754     target_cs = qemu_get_cpu(up.vcpu);
 755     if (!target_cs) {
 756         return -EINVAL;
 757     }
 758
 759     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
 760                      RUN_ON_CPU_HOST_INT(up.vector));
 761     return 0;
 762 }
 763
 764 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 765                                  int cmd, uint64_t arg)
 766 {
 767     int ret = -ENOSYS;
 768     switch (cmd) {
 769     case HVMOP_set_evtchn_upcall_vector:
 770         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
 771                                                  exit->u.hcall.params[0]);
 772         break;
 773
 774     case HVMOP_pagetable_dying:
 775         ret = -ENOSYS;
 776         break;
 777
 778     case HVMOP_set_param:
 779         return handle_set_param(exit, cpu, arg);
 780
 781     default:
 782         return false;
 783     }
 784
 785     exit->u.hcall.result = ret;
 786     return true;
 787 }
 788
 789 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
 790                                      uint64_t arg)
 791 {
 792     struct vcpu_register_vcpu_info rvi;
 793     uint64_t gpa;
 794
 795     /* No need for 32/64 compat handling */
 796     qemu_build_assert(sizeof(rvi) == 16);
 797     qemu_build_assert(sizeof(struct vcpu_info) == 64);
 798
 799     if (!target) {
 800         return -ENOENT;
 801     }
 802
 803     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
 804         return -EFAULT;
 805     }
 806
 807     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
 808         return -EINVAL;
 809     }
 810
 811     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
 812     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
 813     return 0;
 814 }
 815
 816 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
 817                                           uint64_t arg)
 818 {
 819     struct vcpu_register_time_memory_area tma;
 820     uint64_t gpa;
 821     size_t len;
 822
 823     /* No need for 32/64 compat handling */
 824     qemu_build_assert(sizeof(tma) == 8);
 825     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
 826
 827     if (!target) {
 828         return -ENOENT;
 829     }
 830
 831     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
 832         return -EFAULT;
 833     }
 834
 835     /*
 836      * Xen actually uses the GVA and does the translation through the guest
 837      * page tables each time. But Linux/KVM uses the GPA, on the assumption
 838      * that guests only ever use *global* addresses (kernel virtual addresses)
 839      * for it. If Linux is changed to redo the GVA→GPA translation each time,
 840      * it will offer a new vCPU attribute for that, and we'll use it instead.
 841      */
 842     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
 843         len < sizeof(struct vcpu_time_info)) {
 844         return -EFAULT;
 845     }
 846
 847     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
 848                      RUN_ON_CPU_HOST_ULONG(gpa));
 849     return 0;
 850 }
 851
 852 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
 853                                          uint64_t arg)
 854 {
 855     struct vcpu_register_runstate_memory_area rma;
 856     uint64_t gpa;
 857     size_t len;
 858
 859     /* No need for 32/64 compat handling */
 860     qemu_build_assert(sizeof(rma) == 8);
 861     /* The runstate area actually does change size, but Linux copes. */
 862
 863     if (!target) {
 864         return -ENOENT;
 865     }
 866
 867     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
 868         return -EFAULT;
 869     }
 870
 871     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
 872     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
 873         return -EFAULT;
 874     }
 875
 876     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
 877                      RUN_ON_CPU_HOST_ULONG(gpa));
 878     return 0;
 879 }
 880
 881 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 882                                   int cmd, int vcpu_id, uint64_t arg)
 883 {
 884     CPUState *dest = qemu_get_cpu(vcpu_id);
 885     CPUState *cs = CPU(cpu);
 886     int err;
 887
 888     switch (cmd) {
 889     case VCPUOP_register_runstate_memory_area:
 890         err = vcpuop_register_runstate_info(cs, dest, arg);
 891         break;
 892     case VCPUOP_register_vcpu_time_memory_area:
 893         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
 894         break;
 895     case VCPUOP_register_vcpu_info:
 896         err = vcpuop_register_vcpu_info(cs, dest, arg);
 897         break;
 898
 899     default:
 900         return false;
 901     }
 902
 903     exit->u.hcall.result = err;
 904     return true;
 905 }
 906
 907 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 908                                     int cmd, uint64_t arg)
 909 {
 910     CPUState *cs = CPU(cpu);
 911     int err = -ENOSYS;
 912
 913     switch (cmd) {
 914     case EVTCHNOP_init_control:
 915     case EVTCHNOP_expand_array:
 916     case EVTCHNOP_set_priority:
 917         /* We do not support FIFO channels at this point */
 918         err = -ENOSYS;
 919         break;
 920
 921     case EVTCHNOP_status: {
 922         struct evtchn_status status;
 923
 924         qemu_build_assert(sizeof(status) == 24);
 925         if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
 926             err = -EFAULT;
 927             break;
 928         }
 929
 930         err = xen_evtchn_status_op(&status);
 931         if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
 932             err = -EFAULT;
 933         }
 934         break;
 935     }
 936     case EVTCHNOP_close: {
 937         struct evtchn_close close;
 938
 939         qemu_build_assert(sizeof(close) == 4);
 940         if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
 941             err = -EFAULT;
 942             break;
 943         }
 944
 945         err = xen_evtchn_close_op(&close);
 946         break;
 947     }
 948     case EVTCHNOP_unmask: {
 949         struct evtchn_unmask unmask;
 950
 951         qemu_build_assert(sizeof(unmask) == 4);
 952         if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
 953             err = -EFAULT;
 954             break;
 955         }
 956
 957         err = xen_evtchn_unmask_op(&unmask);
 958         break;
 959     }
 960     case EVTCHNOP_bind_virq: {
 961         struct evtchn_bind_virq virq;
 962
 963         qemu_build_assert(sizeof(virq) == 12);
 964         if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
 965             err = -EFAULT;
 966             break;
 967         }
 968
 969         err = xen_evtchn_bind_virq_op(&virq);
 970         if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
 971             err = -EFAULT;
 972         }
 973         break;
 974     }
 975     case EVTCHNOP_bind_ipi: {
 976         struct evtchn_bind_ipi ipi;
 977
 978         qemu_build_assert(sizeof(ipi) == 8);
 979         if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
 980             err = -EFAULT;
 981             break;
 982         }
 983
 984         err = xen_evtchn_bind_ipi_op(&ipi);
 985         if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
 986             err = -EFAULT;
 987         }
 988         break;
 989     }
 990     case EVTCHNOP_send: {
 991         struct evtchn_send send;
 992
 993         qemu_build_assert(sizeof(send) == 4);
 994         if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
 995             err = -EFAULT;
 996             break;
 997         }
 998
 999         err = xen_evtchn_send_op(&send);
1000         break;
1001     }
1002     case EVTCHNOP_alloc_unbound: {
1003         struct evtchn_alloc_unbound alloc;
1004
1005         qemu_build_assert(sizeof(alloc) == 8);
1006         if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1007             err = -EFAULT;
1008             break;
1009         }
1010
1011         err = xen_evtchn_alloc_unbound_op(&alloc);
1012         if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1013             err = -EFAULT;
1014         }
1015         break;
1016     }
1017     case EVTCHNOP_bind_interdomain: {
1018         struct evtchn_bind_interdomain interdomain;
1019
1020         qemu_build_assert(sizeof(interdomain) == 12);
1021         if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1022             err = -EFAULT;
1023             break;
1024         }
1025
1026         err = xen_evtchn_bind_interdomain_op(&interdomain);
1027         if (!err &&
1028             kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1029             err = -EFAULT;
1030         }
1031         break;
1032     }
1033     case EVTCHNOP_bind_vcpu: {
1034         struct evtchn_bind_vcpu vcpu;
1035
1036         qemu_build_assert(sizeof(vcpu) == 8);
1037         if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1038             err = -EFAULT;
1039             break;
1040         }
1041
1042         err = xen_evtchn_bind_vcpu_op(&vcpu);
1043         break;
1044     }
1045     case EVTCHNOP_reset: {
1046         struct evtchn_reset reset;
1047
1048         qemu_build_assert(sizeof(reset) == 2);
1049         if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1050             err = -EFAULT;
1051             break;
1052         }
1053
1054         err = xen_evtchn_reset_op(&reset);
1055         break;
1056     }
1057     default:
1058         return false;
1059     }
1060
1061     exit->u.hcall.result = err;
1062     return true;
1063 }
1064
1065 int kvm_xen_soft_reset(void)
1066 {
1067     CPUState *cpu;
1068     int err;
1069
1070     assert(qemu_mutex_iothread_locked());
1071
1072     trace_kvm_xen_soft_reset();
1073
1074     err = xen_evtchn_soft_reset();
1075     if (err) {
1076         return err;
1077     }
1078
1079     /*
1080      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1081      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1082      * to deliver to the timer interrupt and treats that as 'disabled'.
1083      */
1084     err = xen_evtchn_set_callback_param(0);
1085     if (err) {
1086         return err;
1087     }
1088
1089     CPU_FOREACH(cpu) {
1090         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1091     }
1092
1093     err = xen_overlay_map_shinfo_page(INVALID_GFN);
1094     if (err) {
1095         return err;
1096     }
1097
1098     return 0;
1099 }
1100
1101 static int schedop_shutdown(CPUState *cs, uint64_t arg)
1102 {
1103     struct sched_shutdown shutdown;
1104     int ret = 0;
1105
1106     /* No need for 32/64 compat handling */
1107     qemu_build_assert(sizeof(shutdown) == 4);
1108
1109     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1110         return -EFAULT;
1111     }
1112
1113     switch (shutdown.reason) {
1114     case SHUTDOWN_crash:
1115         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1116         qemu_system_guest_panicked(NULL);
1117         break;
1118
1119     case SHUTDOWN_reboot:
1120         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1121         break;
1122
1123     case SHUTDOWN_poweroff:
1124         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1125         break;
1126
1127     case SHUTDOWN_soft_reset:
1128         qemu_mutex_lock_iothread();
1129         ret = kvm_xen_soft_reset();
1130         qemu_mutex_unlock_iothread();
1131         break;
1132
1133     default:
1134         ret = -EINVAL;
1135         break;
1136     }
1137
1138     return ret;
1139 }
1140
1141 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1142                                    int cmd, uint64_t arg)
1143 {
1144     CPUState *cs = CPU(cpu);
1145     int err = -ENOSYS;
1146
1147     switch (cmd) {
1148     case SCHEDOP_shutdown:
1149         err = schedop_shutdown(cs, arg);
1150         break;
1151
1152     case SCHEDOP_poll:
1153         /*
1154          * Linux will panic if this doesn't work. Just yield; it's not
1155          * worth overthinking it because with event channel handling
1156          * in KVM, the kernel will intercept this and it will never
1157          * reach QEMU anyway. The semantics of the hypercall explicltly
1158          * permit spurious wakeups.
1159          */
1160     case SCHEDOP_yield:
1161         sched_yield();
1162         err = 0;
1163         break;
1164
1165     default:
1166         return false;
1167     }
1168
1169     exit->u.hcall.result = err;
1170     return true;
1171 }
1172
1173 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1174                                     int cmd, uint64_t arg, int count)
1175 {
1176     CPUState *cs = CPU(cpu);
1177     int err;
1178
1179     switch (cmd) {
1180     case GNTTABOP_set_version: {
1181         struct gnttab_set_version set;
1182
1183         qemu_build_assert(sizeof(set) == 4);
1184         if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1185             err = -EFAULT;
1186             break;
1187         }
1188
1189         err = xen_gnttab_set_version_op(&set);
1190         if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1191             err = -EFAULT;
1192         }
1193         break;
1194     }
1195     case GNTTABOP_get_version: {
1196         struct gnttab_get_version get;
1197
1198         qemu_build_assert(sizeof(get) == 8);
1199         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1200             err = -EFAULT;
1201             break;
1202         }
1203
1204         err = xen_gnttab_get_version_op(&get);
1205         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1206             err = -EFAULT;
1207         }
1208         break;
1209     }
1210     case GNTTABOP_query_size: {
1211         struct gnttab_query_size size;
1212
1213         qemu_build_assert(sizeof(size) == 16);
1214         if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1215             err = -EFAULT;
1216             break;
1217         }
1218
1219         err = xen_gnttab_query_size_op(&size);
1220         if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1221             err = -EFAULT;
1222         }
1223         break;
1224     }
1225     case GNTTABOP_setup_table:
1226     case GNTTABOP_copy:
1227     case GNTTABOP_map_grant_ref:
1228     case GNTTABOP_unmap_grant_ref:
1229     case GNTTABOP_swap_grant_ref:
1230         return false;
1231
1232     default:
1233         /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1234         err = -ENOSYS;
1235         break;
1236     }
1237
1238     exit->u.hcall.result = err;
1239     return true;
1240 }
1241
1242 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1243 {
1244     uint16_t code = exit->u.hcall.input;
1245
1246     if (exit->u.hcall.cpl > 0) {
1247         exit->u.hcall.result = -EPERM;
1248         return true;
1249     }
1250
1251     switch (code) {
1252     case __HYPERVISOR_grant_table_op:
1253         return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1254                                        exit->u.hcall.params[1],
1255                                        exit->u.hcall.params[2]);
1256     case __HYPERVISOR_sched_op:
1257         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1258                                       exit->u.hcall.params[1]);
1259     case __HYPERVISOR_event_channel_op:
1260         return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
1261                                        exit->u.hcall.params[1]);
1262     case __HYPERVISOR_vcpu_op:
1263         return kvm_xen_hcall_vcpu_op(exit, cpu,
1264                                      exit->u.hcall.params[0],
1265                                      exit->u.hcall.params[1],
1266                                      exit->u.hcall.params[2]);
1267     case __HYPERVISOR_hvm_op:
1268         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1269                                     exit->u.hcall.params[1]);
1270     case __HYPERVISOR_memory_op:
1271         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1272                                        exit->u.hcall.params[1]);
1273     case __HYPERVISOR_xen_version:
1274         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1275                                          exit->u.hcall.params[1]);
1276     default:
1277         return false;
1278     }
1279 }
1280
1281 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1282 {
1283     if (exit->type != KVM_EXIT_XEN_HCALL) {
1284         return -1;
1285     }
1286
1287     /*
1288      * The kernel latches the guest 32/64 mode when the MSR is used to fill
1289      * the hypercall page. So if we see a hypercall in a mode that doesn't
1290      * match our own idea of the guest mode, fetch the kernel's idea of the
1291      * "long mode" to remain in sync.
1292      */
1293     if (exit->u.hcall.longmode != xen_is_long_mode()) {
1294         xen_sync_long_mode();
1295     }
1296
1297     if (!do_kvm_xen_handle_exit(cpu, exit)) {
1298         /*
1299          * Some hypercalls will be deliberately "implemented" by returning
1300          * -ENOSYS. This case is for hypercalls which are unexpected.
1301          */
1302         exit->u.hcall.result = -ENOSYS;
1303         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1304                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1305                       (uint64_t)exit->u.hcall.input,
1306                       (uint64_t)exit->u.hcall.params[0],
1307                       (uint64_t)exit->u.hcall.params[1],
1308                       (uint64_t)exit->u.hcall.params[2]);
1309     }
1310
1311     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1312                             exit->u.hcall.input, exit->u.hcall.params[0],
1313                             exit->u.hcall.params[1], exit->u.hcall.params[2],
1314                             exit->u.hcall.result);
1315     return 0;
1316 }
1317
1318 uint16_t kvm_xen_get_gnttab_max_frames(void)
1319 {
1320     KVMState *s = KVM_STATE(current_accel());
1321     return s->xen_gnttab_max_frames;
1322 }
1323
1324 int kvm_put_xen_state(CPUState *cs)
1325 {
1326     X86CPU *cpu = X86_CPU(cs);
1327     CPUX86State *env = &cpu->env;
1328     uint64_t gpa;
1329     int ret;
1330
1331     gpa = env->xen_vcpu_info_gpa;
1332     if (gpa == INVALID_GPA) {
1333         gpa = env->xen_vcpu_info_default_gpa;
1334     }
1335
1336     if (gpa != INVALID_GPA) {
1337         ret = set_vcpu_info(cs, gpa);
1338         if (ret < 0) {
1339             return ret;
1340         }
1341     }
1342
1343     gpa = env->xen_vcpu_time_info_gpa;
1344     if (gpa != INVALID_GPA) {
1345         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1346                                     gpa);
1347         if (ret < 0) {
1348             return ret;
1349         }
1350     }
1351
1352     gpa = env->xen_vcpu_runstate_gpa;
1353     if (gpa != INVALID_GPA) {
1354         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1355                                     gpa);
1356         if (ret < 0) {
1357             return ret;
1358         }
1359     }
1360
1361     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1362         return 0;
1363     }
1364
1365     if (env->xen_vcpu_callback_vector) {
1366         ret = kvm_xen_set_vcpu_callback_vector(cs);
1367         if (ret < 0) {
1368             return ret;
1369         }
1370     }
1371
1372     if (env->xen_virq[VIRQ_TIMER]) {
1373         ret = kvm_xen_set_vcpu_timer(cs);
1374         if (ret < 0) {
1375             return ret;
1376         }
1377     }
1378     return 0;
1379 }
1380
1381 int kvm_get_xen_state(CPUState *cs)
1382 {
1383     X86CPU *cpu = X86_CPU(cs);
1384     CPUX86State *env = &cpu->env;
1385     uint64_t gpa;
1386     int ret;
1387
1388     /*
1389      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1390      * to it. It's up to userspace to *assume* that any page shared thus is
1391      * always considered dirty. The shared_info page is different since it's
1392      * an overlay and migrated separately anyway.
1393      */
1394     gpa = env->xen_vcpu_info_gpa;
1395     if (gpa == INVALID_GPA) {
1396         gpa = env->xen_vcpu_info_default_gpa;
1397     }
1398     if (gpa != INVALID_GPA) {
1399         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1400                                                      gpa,
1401                                                      sizeof(struct vcpu_info));
1402         if (mrs.mr &&
1403             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1404             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1405                                     sizeof(struct vcpu_info));
1406         }
1407     }
1408
1409     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1410         return 0;
1411     }
1412
1413     /*
1414      * If the kernel is accelerating timers, read out the current value of the
1415      * singleshot timer deadline.
1416      */
1417     if (env->xen_virq[VIRQ_TIMER]) {
1418         struct kvm_xen_vcpu_attr va = {
1419             .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1420         };
1421         ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1422         if (ret < 0) {
1423             return ret;
1424         }
1425         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1426     }
1427
1428     return 0;
1429 }