target/i386/kvm/xen-emu.c

   1 /*
   2  * Xen HVM emulation support in KVM
   3  *
   4  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
   5  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8  * See the COPYING file in the top-level directory.
   9  *
  10  */
  11
  12 #include "qemu/osdep.h"
  13 #include "qemu/log.h"
  14 #include "qemu/main-loop.h"
  15 #include "hw/xen/xen.h"
  16 #include "sysemu/kvm_int.h"
  17 #include "sysemu/kvm_xen.h"
  18 #include "kvm/kvm_i386.h"
  19 #include "exec/address-spaces.h"
  20 #include "xen-emu.h"
  21 #include "trace.h"
  22 #include "sysemu/runstate.h"
  23
  24 #include "hw/i386/kvm/xen_overlay.h"
  25
  26 #include "hw/xen/interface/version.h"
  27 #include "hw/xen/interface/sched.h"
  28 #include "hw/xen/interface/memory.h"
  29 #include "hw/xen/interface/hvm/hvm_op.h"
  30 #include "hw/xen/interface/vcpu.h"
  31
  32 #include "xen-compat.h"
  33
  34 #ifdef TARGET_X86_64
  35 #define hypercall_compat32(longmode) (!(longmode))
  36 #else
  37 #define hypercall_compat32(longmode) (false)
  38 #endif
  39
  40 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
  41                            size_t *len, bool is_write)
  42 {
  43         struct kvm_translation tr = {
  44             .linear_address = gva,
  45         };
  46
  47         if (len) {
  48             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
  49         }
  50
  51         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
  52             (is_write && !tr.writeable)) {
  53             return false;
  54         }
  55         *gpa = tr.physical_address;
  56         return true;
  57 }
  58
  59 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
  60                       bool is_write)
  61 {
  62     uint8_t *buf = (uint8_t *)_buf;
  63     uint64_t gpa;
  64     size_t len;
  65
  66     while (sz) {
  67         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
  68             return -EFAULT;
  69         }
  70         if (len > sz) {
  71             len = sz;
  72         }
  73
  74         cpu_physical_memory_rw(gpa, buf, len, is_write);
  75
  76         buf += len;
  77         sz -= len;
  78         gva += len;
  79     }
  80
  81     return 0;
  82 }
  83
  84 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
  85                                     size_t sz)
  86 {
  87     return kvm_gva_rw(cs, gva, buf, sz, false);
  88 }
  89
  90 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
  91                                   size_t sz)
  92 {
  93     return kvm_gva_rw(cs, gva, buf, sz, true);
  94 }
  95
  96 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
  97 {
  98     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
  99         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
 100     struct kvm_xen_hvm_config cfg = {
 101         .msr = hypercall_msr,
 102         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
 103     };
 104     int xen_caps, ret;
 105
 106     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
 107     if (required_caps & ~xen_caps) {
 108         error_report("kvm: Xen HVM guest support not present or insufficient");
 109         return -ENOSYS;
 110     }
 111
 112     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
 113         struct kvm_xen_hvm_attr ha = {
 114             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
 115             .u.xen_version = s->xen_version,
 116         };
 117         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
 118
 119         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 120     }
 121
 122     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
 123     if (ret < 0) {
 124         error_report("kvm: Failed to enable Xen HVM support: %s",
 125                      strerror(-ret));
 126         return ret;
 127     }
 128
 129     s->xen_caps = xen_caps;
 130     return 0;
 131 }
 132
 133 int kvm_xen_init_vcpu(CPUState *cs)
 134 {
 135     X86CPU *cpu = X86_CPU(cs);
 136     CPUX86State *env = &cpu->env;
 137     int err;
 138
 139     /*
 140      * The kernel needs to know the Xen/ACPI vCPU ID because that's
 141      * what the guest uses in hypercalls such as timers. It doesn't
 142      * match the APIC ID which is generally used for talking to the
 143      * kernel about vCPUs. And if vCPU threads race with creating
 144      * their KVM vCPUs out of order, it doesn't necessarily match
 145      * with the kernel's internal vCPU indices either.
 146      */
 147     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 148         struct kvm_xen_vcpu_attr va = {
 149             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
 150             .u.vcpu_id = cs->cpu_index,
 151         };
 152         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 153         if (err) {
 154             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
 155                          strerror(-err));
 156             return err;
 157         }
 158     }
 159
 160     env->xen_vcpu_info_gpa = INVALID_GPA;
 161     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 162     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 163
 164     return 0;
 165 }
 166
 167 uint32_t kvm_xen_get_caps(void)
 168 {
 169     return kvm_state->xen_caps;
 170 }
 171
 172 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
 173                                      int cmd, uint64_t arg)
 174 {
 175     int err = 0;
 176
 177     switch (cmd) {
 178     case XENVER_get_features: {
 179         struct xen_feature_info fi;
 180
 181         /* No need for 32/64 compat handling */
 182         qemu_build_assert(sizeof(fi) == 8);
 183
 184         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
 185         if (err) {
 186             break;
 187         }
 188
 189         fi.submap = 0;
 190         if (fi.submap_idx == 0) {
 191             fi.submap |= 1 << XENFEAT_writable_page_tables |
 192                          1 << XENFEAT_writable_descriptor_tables |
 193                          1 << XENFEAT_auto_translated_physmap |
 194                          1 << XENFEAT_supervisor_mode_kernel;
 195         }
 196
 197         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
 198         break;
 199     }
 200
 201     default:
 202         return false;
 203     }
 204
 205     exit->u.hcall.result = err;
 206     return true;
 207 }
 208
 209 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
 210 {
 211     struct kvm_xen_vcpu_attr xhsi;
 212
 213     xhsi.type = type;
 214     xhsi.u.gpa = gpa;
 215
 216     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
 217
 218     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
 219 }
 220
 221 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
 222 {
 223     X86CPU *cpu = X86_CPU(cs);
 224     CPUX86State *env = &cpu->env;
 225
 226     env->xen_vcpu_info_default_gpa = data.host_ulong;
 227
 228     /* Changing the default does nothing if a vcpu_info was explicitly set. */
 229     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
 230         kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
 231                               env->xen_vcpu_info_default_gpa);
 232     }
 233 }
 234
 235 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
 236 {
 237     X86CPU *cpu = X86_CPU(cs);
 238     CPUX86State *env = &cpu->env;
 239
 240     env->xen_vcpu_info_gpa = data.host_ulong;
 241
 242     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
 243                           env->xen_vcpu_info_gpa);
 244 }
 245
 246 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
 247 {
 248     X86CPU *cpu = X86_CPU(cs);
 249     CPUX86State *env = &cpu->env;
 250
 251     env->xen_vcpu_time_info_gpa = data.host_ulong;
 252
 253     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 254                           env->xen_vcpu_time_info_gpa);
 255 }
 256
 257 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
 258 {
 259     X86CPU *cpu = X86_CPU(cs);
 260     CPUX86State *env = &cpu->env;
 261
 262     env->xen_vcpu_info_gpa = INVALID_GPA;
 263     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 264     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 265
 266     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, INVALID_GPA);
 267     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 268                           INVALID_GPA);
 269 }
 270
 271 static int xen_set_shared_info(uint64_t gfn)
 272 {
 273     uint64_t gpa = gfn << TARGET_PAGE_BITS;
 274     int i, err;
 275
 276     QEMU_IOTHREAD_LOCK_GUARD();
 277
 278     /*
 279      * The xen_overlay device tells KVM about it too, since it had to
 280      * do that on migration load anyway (unless we're going to jump
 281      * through lots of hoops to maintain the fiction that this isn't
 282      * KVM-specific.
 283      */
 284     err = xen_overlay_map_shinfo_page(gpa);
 285     if (err) {
 286             return err;
 287     }
 288
 289     trace_kvm_xen_set_shared_info(gfn);
 290
 291     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
 292         CPUState *cpu = qemu_get_cpu(i);
 293         if (cpu) {
 294             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
 295                              RUN_ON_CPU_HOST_ULONG(gpa));
 296         }
 297         gpa += sizeof(vcpu_info_t);
 298     }
 299
 300     return err;
 301 }
 302
 303 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
 304 {
 305     switch (space) {
 306     case XENMAPSPACE_shared_info:
 307         if (idx > 0) {
 308             return -EINVAL;
 309         }
 310         return xen_set_shared_info(gfn);
 311
 312     case XENMAPSPACE_grant_table:
 313     case XENMAPSPACE_gmfn:
 314     case XENMAPSPACE_gmfn_range:
 315         return -ENOTSUP;
 316
 317     case XENMAPSPACE_gmfn_foreign:
 318     case XENMAPSPACE_dev_mmio:
 319         return -EPERM;
 320
 321     default:
 322         return -EINVAL;
 323     }
 324 }
 325
 326 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
 327                              uint64_t arg)
 328 {
 329     struct xen_add_to_physmap xatp;
 330     CPUState *cs = CPU(cpu);
 331
 332     if (hypercall_compat32(exit->u.hcall.longmode)) {
 333         struct compat_xen_add_to_physmap xatp32;
 334
 335         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
 336         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
 337             return -EFAULT;
 338         }
 339         xatp.domid = xatp32.domid;
 340         xatp.size = xatp32.size;
 341         xatp.space = xatp32.space;
 342         xatp.idx = xatp32.idx;
 343         xatp.gpfn = xatp32.gpfn;
 344     } else {
 345         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
 346             return -EFAULT;
 347         }
 348     }
 349
 350     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
 351         return -ESRCH;
 352     }
 353
 354     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
 355 }
 356
 357 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
 358                                    uint64_t arg)
 359 {
 360     struct xen_add_to_physmap_batch xatpb;
 361     unsigned long idxs_gva, gpfns_gva, errs_gva;
 362     CPUState *cs = CPU(cpu);
 363     size_t op_sz;
 364
 365     if (hypercall_compat32(exit->u.hcall.longmode)) {
 366         struct compat_xen_add_to_physmap_batch xatpb32;
 367
 368         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
 369         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
 370             return -EFAULT;
 371         }
 372         xatpb.domid = xatpb32.domid;
 373         xatpb.space = xatpb32.space;
 374         xatpb.size = xatpb32.size;
 375
 376         idxs_gva = xatpb32.idxs.c;
 377         gpfns_gva = xatpb32.gpfns.c;
 378         errs_gva = xatpb32.errs.c;
 379         op_sz = sizeof(uint32_t);
 380     } else {
 381         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
 382             return -EFAULT;
 383         }
 384         op_sz = sizeof(unsigned long);
 385         idxs_gva = (unsigned long)xatpb.idxs.p;
 386         gpfns_gva = (unsigned long)xatpb.gpfns.p;
 387         errs_gva = (unsigned long)xatpb.errs.p;
 388     }
 389
 390     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
 391         return -ESRCH;
 392     }
 393
 394     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
 395     if (xatpb.space == XENMAPSPACE_gmfn_range) {
 396         return -EINVAL;
 397     }
 398
 399     while (xatpb.size--) {
 400         unsigned long idx = 0;
 401         unsigned long gpfn = 0;
 402         int err;
 403
 404         /* For 32-bit compat this only copies the low 32 bits of each */
 405         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
 406             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
 407             return -EFAULT;
 408         }
 409         idxs_gva += op_sz;
 410         gpfns_gva += op_sz;
 411
 412         err = add_to_physmap_one(xatpb.space, idx, gpfn);
 413
 414         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
 415             return -EFAULT;
 416         }
 417         errs_gva += sizeof(err);
 418     }
 419     return 0;
 420 }
 421
 422 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 423                                    int cmd, uint64_t arg)
 424 {
 425     int err;
 426
 427     switch (cmd) {
 428     case XENMEM_add_to_physmap:
 429         err = do_add_to_physmap(exit, cpu, arg);
 430         break;
 431
 432     case XENMEM_add_to_physmap_batch:
 433         err = do_add_to_physmap_batch(exit, cpu, arg);
 434         break;
 435
 436     default:
 437         return false;
 438     }
 439
 440     exit->u.hcall.result = err;
 441     return true;
 442 }
 443
 444 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 445                                  int cmd, uint64_t arg)
 446 {
 447     switch (cmd) {
 448     case HVMOP_pagetable_dying:
 449         exit->u.hcall.result = -ENOSYS;
 450         return true;
 451
 452     default:
 453         return false;
 454     }
 455 }
 456
 457 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
 458                                      uint64_t arg)
 459 {
 460     struct vcpu_register_vcpu_info rvi;
 461     uint64_t gpa;
 462
 463     /* No need for 32/64 compat handling */
 464     qemu_build_assert(sizeof(rvi) == 16);
 465     qemu_build_assert(sizeof(struct vcpu_info) == 64);
 466
 467     if (!target) {
 468         return -ENOENT;
 469     }
 470
 471     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
 472         return -EFAULT;
 473     }
 474
 475     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
 476         return -EINVAL;
 477     }
 478
 479     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
 480     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
 481     return 0;
 482 }
 483
 484 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
 485                                           uint64_t arg)
 486 {
 487     struct vcpu_register_time_memory_area tma;
 488     uint64_t gpa;
 489     size_t len;
 490
 491     /* No need for 32/64 compat handling */
 492     qemu_build_assert(sizeof(tma) == 8);
 493     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
 494
 495     if (!target) {
 496         return -ENOENT;
 497     }
 498
 499     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
 500         return -EFAULT;
 501     }
 502
 503     /*
 504      * Xen actually uses the GVA and does the translation through the guest
 505      * page tables each time. But Linux/KVM uses the GPA, on the assumption
 506      * that guests only ever use *global* addresses (kernel virtual addresses)
 507      * for it. If Linux is changed to redo the GVA→GPA translation each time,
 508      * it will offer a new vCPU attribute for that, and we'll use it instead.
 509      */
 510     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
 511         len < sizeof(struct vcpu_time_info)) {
 512         return -EFAULT;
 513     }
 514
 515     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
 516                      RUN_ON_CPU_HOST_ULONG(gpa));
 517     return 0;
 518 }
 519
 520 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 521                                   int cmd, int vcpu_id, uint64_t arg)
 522 {
 523     CPUState *dest = qemu_get_cpu(vcpu_id);
 524     CPUState *cs = CPU(cpu);
 525     int err;
 526
 527     switch (cmd) {
 528     case VCPUOP_register_vcpu_time_memory_area:
 529         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
 530         break;
 531     case VCPUOP_register_vcpu_info:
 532         err = vcpuop_register_vcpu_info(cs, dest, arg);
 533         break;
 534
 535     default:
 536         return false;
 537     }
 538
 539     exit->u.hcall.result = err;
 540     return true;
 541 }
 542
 543 int kvm_xen_soft_reset(void)
 544 {
 545     CPUState *cpu;
 546     int err;
 547
 548     assert(qemu_mutex_iothread_locked());
 549
 550     trace_kvm_xen_soft_reset();
 551
 552     CPU_FOREACH(cpu) {
 553         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
 554     }
 555
 556     err = xen_overlay_map_shinfo_page(INVALID_GFN);
 557     if (err) {
 558         return err;
 559     }
 560
 561     return 0;
 562 }
 563
 564 static int schedop_shutdown(CPUState *cs, uint64_t arg)
 565 {
 566     struct sched_shutdown shutdown;
 567     int ret = 0;
 568
 569     /* No need for 32/64 compat handling */
 570     qemu_build_assert(sizeof(shutdown) == 4);
 571
 572     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
 573         return -EFAULT;
 574     }
 575
 576     switch (shutdown.reason) {
 577     case SHUTDOWN_crash:
 578         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
 579         qemu_system_guest_panicked(NULL);
 580         break;
 581
 582     case SHUTDOWN_reboot:
 583         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 584         break;
 585
 586     case SHUTDOWN_poweroff:
 587         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 588         break;
 589
 590     case SHUTDOWN_soft_reset:
 591         qemu_mutex_lock_iothread();
 592         ret = kvm_xen_soft_reset();
 593         qemu_mutex_unlock_iothread();
 594         break;
 595
 596     default:
 597         ret = -EINVAL;
 598         break;
 599     }
 600
 601     return ret;
 602 }
 603
 604 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 605                                    int cmd, uint64_t arg)
 606 {
 607     CPUState *cs = CPU(cpu);
 608     int err = -ENOSYS;
 609
 610     switch (cmd) {
 611     case SCHEDOP_shutdown:
 612         err = schedop_shutdown(cs, arg);
 613         break;
 614
 615     case SCHEDOP_poll:
 616         /*
 617          * Linux will panic if this doesn't work. Just yield; it's not
 618          * worth overthinking it because with event channel handling
 619          * in KVM, the kernel will intercept this and it will never
 620          * reach QEMU anyway. The semantics of the hypercall explicltly
 621          * permit spurious wakeups.
 622          */
 623     case SCHEDOP_yield:
 624         sched_yield();
 625         err = 0;
 626         break;
 627
 628     default:
 629         return false;
 630     }
 631
 632     exit->u.hcall.result = err;
 633     return true;
 634 }
 635
 636 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
 637 {
 638     uint16_t code = exit->u.hcall.input;
 639
 640     if (exit->u.hcall.cpl > 0) {
 641         exit->u.hcall.result = -EPERM;
 642         return true;
 643     }
 644
 645     switch (code) {
 646     case __HYPERVISOR_sched_op:
 647         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
 648                                       exit->u.hcall.params[1]);
 649     case __HYPERVISOR_vcpu_op:
 650         return kvm_xen_hcall_vcpu_op(exit, cpu,
 651                                      exit->u.hcall.params[0],
 652                                      exit->u.hcall.params[1],
 653                                      exit->u.hcall.params[2]);
 654     case __HYPERVISOR_hvm_op:
 655         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
 656                                     exit->u.hcall.params[1]);
 657     case __HYPERVISOR_memory_op:
 658         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
 659                                        exit->u.hcall.params[1]);
 660     case __HYPERVISOR_xen_version:
 661         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
 662                                          exit->u.hcall.params[1]);
 663     default:
 664         return false;
 665     }
 666 }
 667
 668 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
 669 {
 670     if (exit->type != KVM_EXIT_XEN_HCALL) {
 671         return -1;
 672     }
 673
 674     /*
 675      * The kernel latches the guest 32/64 mode when the MSR is used to fill
 676      * the hypercall page. So if we see a hypercall in a mode that doesn't
 677      * match our own idea of the guest mode, fetch the kernel's idea of the
 678      * "long mode" to remain in sync.
 679      */
 680     if (exit->u.hcall.longmode != xen_is_long_mode()) {
 681         xen_sync_long_mode();
 682     }
 683
 684     if (!do_kvm_xen_handle_exit(cpu, exit)) {
 685         /*
 686          * Some hypercalls will be deliberately "implemented" by returning
 687          * -ENOSYS. This case is for hypercalls which are unexpected.
 688          */
 689         exit->u.hcall.result = -ENOSYS;
 690         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
 691                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
 692                       (uint64_t)exit->u.hcall.input,
 693                       (uint64_t)exit->u.hcall.params[0],
 694                       (uint64_t)exit->u.hcall.params[1],
 695                       (uint64_t)exit->u.hcall.params[2]);
 696     }
 697
 698     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
 699                             exit->u.hcall.input, exit->u.hcall.params[0],
 700                             exit->u.hcall.params[1], exit->u.hcall.params[2],
 701                             exit->u.hcall.result);
 702     return 0;
 703 }
 704
 705 int kvm_put_xen_state(CPUState *cs)
 706 {
 707     X86CPU *cpu = X86_CPU(cs);
 708     CPUX86State *env = &cpu->env;
 709     uint64_t gpa;
 710     int ret;
 711
 712     gpa = env->xen_vcpu_info_gpa;
 713     if (gpa == INVALID_GPA) {
 714         gpa = env->xen_vcpu_info_default_gpa;
 715     }
 716
 717     if (gpa != INVALID_GPA) {
 718         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
 719         if (ret < 0) {
 720             return ret;
 721         }
 722     }
 723
 724     gpa = env->xen_vcpu_time_info_gpa;
 725     if (gpa != INVALID_GPA) {
 726         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 727                                     gpa);
 728         if (ret < 0) {
 729             return ret;
 730         }
 731     }
 732
 733     return 0;
 734 }
 735
 736 int kvm_get_xen_state(CPUState *cs)
 737 {
 738     X86CPU *cpu = X86_CPU(cs);
 739     CPUX86State *env = &cpu->env;
 740     uint64_t gpa;
 741
 742     /*
 743      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
 744      * to it. It's up to userspace to *assume* that any page shared thus is
 745      * always considered dirty. The shared_info page is different since it's
 746      * an overlay and migrated separately anyway.
 747      */
 748     gpa = env->xen_vcpu_info_gpa;
 749     if (gpa == INVALID_GPA) {
 750         gpa = env->xen_vcpu_info_default_gpa;
 751     }
 752     if (gpa != INVALID_GPA) {
 753         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
 754                                                      gpa,
 755                                                      sizeof(struct vcpu_info));
 756         if (mrs.mr &&
 757             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
 758             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
 759                                     sizeof(struct vcpu_info));
 760         }
 761     }
 762
 763     return 0;
 764 }