target/i386/kvm/xen-emu.c

   1 /*
   2  * Xen HVM emulation support in KVM
   3  *
   4  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
   5  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8  * See the COPYING file in the top-level directory.
   9  *
  10  */
  11
  12 #include "qemu/osdep.h"
  13 #include "qemu/log.h"
  14 #include "qemu/main-loop.h"
  15 #include "hw/xen/xen.h"
  16 #include "sysemu/kvm_int.h"
  17 #include "sysemu/kvm_xen.h"
  18 #include "kvm/kvm_i386.h"
  19 #include "exec/address-spaces.h"
  20 #include "xen-emu.h"
  21 #include "trace.h"
  22 #include "sysemu/runstate.h"
  23
  24 #include "hw/i386/kvm/xen_overlay.h"
  25
  26 #include "hw/xen/interface/version.h"
  27 #include "hw/xen/interface/sched.h"
  28 #include "hw/xen/interface/memory.h"
  29 #include "hw/xen/interface/hvm/hvm_op.h"
  30 #include "hw/xen/interface/vcpu.h"
  31
  32 #include "xen-compat.h"
  33
  34 #ifdef TARGET_X86_64
  35 #define hypercall_compat32(longmode) (!(longmode))
  36 #else
  37 #define hypercall_compat32(longmode) (false)
  38 #endif
  39
  40 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
  41                            size_t *len, bool is_write)
  42 {
  43         struct kvm_translation tr = {
  44             .linear_address = gva,
  45         };
  46
  47         if (len) {
  48             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
  49         }
  50
  51         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
  52             (is_write && !tr.writeable)) {
  53             return false;
  54         }
  55         *gpa = tr.physical_address;
  56         return true;
  57 }
  58
  59 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
  60                       bool is_write)
  61 {
  62     uint8_t *buf = (uint8_t *)_buf;
  63     uint64_t gpa;
  64     size_t len;
  65
  66     while (sz) {
  67         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
  68             return -EFAULT;
  69         }
  70         if (len > sz) {
  71             len = sz;
  72         }
  73
  74         cpu_physical_memory_rw(gpa, buf, len, is_write);
  75
  76         buf += len;
  77         sz -= len;
  78         gva += len;
  79     }
  80
  81     return 0;
  82 }
  83
  84 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
  85                                     size_t sz)
  86 {
  87     return kvm_gva_rw(cs, gva, buf, sz, false);
  88 }
  89
  90 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
  91                                   size_t sz)
  92 {
  93     return kvm_gva_rw(cs, gva, buf, sz, true);
  94 }
  95
  96 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
  97 {
  98     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
  99         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
 100     struct kvm_xen_hvm_config cfg = {
 101         .msr = hypercall_msr,
 102         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
 103     };
 104     int xen_caps, ret;
 105
 106     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
 107     if (required_caps & ~xen_caps) {
 108         error_report("kvm: Xen HVM guest support not present or insufficient");
 109         return -ENOSYS;
 110     }
 111
 112     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
 113         struct kvm_xen_hvm_attr ha = {
 114             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
 115             .u.xen_version = s->xen_version,
 116         };
 117         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
 118
 119         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 120     }
 121
 122     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
 123     if (ret < 0) {
 124         error_report("kvm: Failed to enable Xen HVM support: %s",
 125                      strerror(-ret));
 126         return ret;
 127     }
 128
 129     s->xen_caps = xen_caps;
 130     return 0;
 131 }
 132
 133 int kvm_xen_init_vcpu(CPUState *cs)
 134 {
 135     X86CPU *cpu = X86_CPU(cs);
 136     CPUX86State *env = &cpu->env;
 137     int err;
 138
 139     /*
 140      * The kernel needs to know the Xen/ACPI vCPU ID because that's
 141      * what the guest uses in hypercalls such as timers. It doesn't
 142      * match the APIC ID which is generally used for talking to the
 143      * kernel about vCPUs. And if vCPU threads race with creating
 144      * their KVM vCPUs out of order, it doesn't necessarily match
 145      * with the kernel's internal vCPU indices either.
 146      */
 147     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 148         struct kvm_xen_vcpu_attr va = {
 149             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
 150             .u.vcpu_id = cs->cpu_index,
 151         };
 152         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 153         if (err) {
 154             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
 155                          strerror(-err));
 156             return err;
 157         }
 158     }
 159
 160     env->xen_vcpu_info_gpa = INVALID_GPA;
 161     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 162     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 163     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 164
 165     return 0;
 166 }
 167
 168 uint32_t kvm_xen_get_caps(void)
 169 {
 170     return kvm_state->xen_caps;
 171 }
 172
 173 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
 174                                      int cmd, uint64_t arg)
 175 {
 176     int err = 0;
 177
 178     switch (cmd) {
 179     case XENVER_get_features: {
 180         struct xen_feature_info fi;
 181
 182         /* No need for 32/64 compat handling */
 183         qemu_build_assert(sizeof(fi) == 8);
 184
 185         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
 186         if (err) {
 187             break;
 188         }
 189
 190         fi.submap = 0;
 191         if (fi.submap_idx == 0) {
 192             fi.submap |= 1 << XENFEAT_writable_page_tables |
 193                          1 << XENFEAT_writable_descriptor_tables |
 194                          1 << XENFEAT_auto_translated_physmap |
 195                          1 << XENFEAT_supervisor_mode_kernel;
 196         }
 197
 198         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
 199         break;
 200     }
 201
 202     default:
 203         return false;
 204     }
 205
 206     exit->u.hcall.result = err;
 207     return true;
 208 }
 209
 210 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
 211 {
 212     struct kvm_xen_vcpu_attr xhsi;
 213
 214     xhsi.type = type;
 215     xhsi.u.gpa = gpa;
 216
 217     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
 218
 219     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
 220 }
 221
 222 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
 223 {
 224     X86CPU *cpu = X86_CPU(cs);
 225     CPUX86State *env = &cpu->env;
 226
 227     env->xen_vcpu_info_default_gpa = data.host_ulong;
 228
 229     /* Changing the default does nothing if a vcpu_info was explicitly set. */
 230     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
 231         kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
 232                               env->xen_vcpu_info_default_gpa);
 233     }
 234 }
 235
 236 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
 237 {
 238     X86CPU *cpu = X86_CPU(cs);
 239     CPUX86State *env = &cpu->env;
 240
 241     env->xen_vcpu_info_gpa = data.host_ulong;
 242
 243     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
 244                           env->xen_vcpu_info_gpa);
 245 }
 246
 247 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
 248 {
 249     X86CPU *cpu = X86_CPU(cs);
 250     CPUX86State *env = &cpu->env;
 251
 252     env->xen_vcpu_time_info_gpa = data.host_ulong;
 253
 254     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 255                           env->xen_vcpu_time_info_gpa);
 256 }
 257
 258 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
 259 {
 260     X86CPU *cpu = X86_CPU(cs);
 261     CPUX86State *env = &cpu->env;
 262
 263     env->xen_vcpu_runstate_gpa = data.host_ulong;
 264
 265     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 266                           env->xen_vcpu_runstate_gpa);
 267 }
 268
 269 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
 270 {
 271     X86CPU *cpu = X86_CPU(cs);
 272     CPUX86State *env = &cpu->env;
 273
 274     env->xen_vcpu_info_gpa = INVALID_GPA;
 275     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 276     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 277     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 278
 279     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, INVALID_GPA);
 280     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 281                           INVALID_GPA);
 282     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 283                           INVALID_GPA);
 284
 285 }
 286
 287 static int xen_set_shared_info(uint64_t gfn)
 288 {
 289     uint64_t gpa = gfn << TARGET_PAGE_BITS;
 290     int i, err;
 291
 292     QEMU_IOTHREAD_LOCK_GUARD();
 293
 294     /*
 295      * The xen_overlay device tells KVM about it too, since it had to
 296      * do that on migration load anyway (unless we're going to jump
 297      * through lots of hoops to maintain the fiction that this isn't
 298      * KVM-specific.
 299      */
 300     err = xen_overlay_map_shinfo_page(gpa);
 301     if (err) {
 302             return err;
 303     }
 304
 305     trace_kvm_xen_set_shared_info(gfn);
 306
 307     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
 308         CPUState *cpu = qemu_get_cpu(i);
 309         if (cpu) {
 310             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
 311                              RUN_ON_CPU_HOST_ULONG(gpa));
 312         }
 313         gpa += sizeof(vcpu_info_t);
 314     }
 315
 316     return err;
 317 }
 318
 319 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
 320 {
 321     switch (space) {
 322     case XENMAPSPACE_shared_info:
 323         if (idx > 0) {
 324             return -EINVAL;
 325         }
 326         return xen_set_shared_info(gfn);
 327
 328     case XENMAPSPACE_grant_table:
 329     case XENMAPSPACE_gmfn:
 330     case XENMAPSPACE_gmfn_range:
 331         return -ENOTSUP;
 332
 333     case XENMAPSPACE_gmfn_foreign:
 334     case XENMAPSPACE_dev_mmio:
 335         return -EPERM;
 336
 337     default:
 338         return -EINVAL;
 339     }
 340 }
 341
 342 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
 343                              uint64_t arg)
 344 {
 345     struct xen_add_to_physmap xatp;
 346     CPUState *cs = CPU(cpu);
 347
 348     if (hypercall_compat32(exit->u.hcall.longmode)) {
 349         struct compat_xen_add_to_physmap xatp32;
 350
 351         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
 352         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
 353             return -EFAULT;
 354         }
 355         xatp.domid = xatp32.domid;
 356         xatp.size = xatp32.size;
 357         xatp.space = xatp32.space;
 358         xatp.idx = xatp32.idx;
 359         xatp.gpfn = xatp32.gpfn;
 360     } else {
 361         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
 362             return -EFAULT;
 363         }
 364     }
 365
 366     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
 367         return -ESRCH;
 368     }
 369
 370     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
 371 }
 372
 373 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
 374                                    uint64_t arg)
 375 {
 376     struct xen_add_to_physmap_batch xatpb;
 377     unsigned long idxs_gva, gpfns_gva, errs_gva;
 378     CPUState *cs = CPU(cpu);
 379     size_t op_sz;
 380
 381     if (hypercall_compat32(exit->u.hcall.longmode)) {
 382         struct compat_xen_add_to_physmap_batch xatpb32;
 383
 384         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
 385         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
 386             return -EFAULT;
 387         }
 388         xatpb.domid = xatpb32.domid;
 389         xatpb.space = xatpb32.space;
 390         xatpb.size = xatpb32.size;
 391
 392         idxs_gva = xatpb32.idxs.c;
 393         gpfns_gva = xatpb32.gpfns.c;
 394         errs_gva = xatpb32.errs.c;
 395         op_sz = sizeof(uint32_t);
 396     } else {
 397         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
 398             return -EFAULT;
 399         }
 400         op_sz = sizeof(unsigned long);
 401         idxs_gva = (unsigned long)xatpb.idxs.p;
 402         gpfns_gva = (unsigned long)xatpb.gpfns.p;
 403         errs_gva = (unsigned long)xatpb.errs.p;
 404     }
 405
 406     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
 407         return -ESRCH;
 408     }
 409
 410     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
 411     if (xatpb.space == XENMAPSPACE_gmfn_range) {
 412         return -EINVAL;
 413     }
 414
 415     while (xatpb.size--) {
 416         unsigned long idx = 0;
 417         unsigned long gpfn = 0;
 418         int err;
 419
 420         /* For 32-bit compat this only copies the low 32 bits of each */
 421         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
 422             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
 423             return -EFAULT;
 424         }
 425         idxs_gva += op_sz;
 426         gpfns_gva += op_sz;
 427
 428         err = add_to_physmap_one(xatpb.space, idx, gpfn);
 429
 430         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
 431             return -EFAULT;
 432         }
 433         errs_gva += sizeof(err);
 434     }
 435     return 0;
 436 }
 437
 438 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 439                                    int cmd, uint64_t arg)
 440 {
 441     int err;
 442
 443     switch (cmd) {
 444     case XENMEM_add_to_physmap:
 445         err = do_add_to_physmap(exit, cpu, arg);
 446         break;
 447
 448     case XENMEM_add_to_physmap_batch:
 449         err = do_add_to_physmap_batch(exit, cpu, arg);
 450         break;
 451
 452     default:
 453         return false;
 454     }
 455
 456     exit->u.hcall.result = err;
 457     return true;
 458 }
 459
 460 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 461                                  int cmd, uint64_t arg)
 462 {
 463     switch (cmd) {
 464     case HVMOP_pagetable_dying:
 465         exit->u.hcall.result = -ENOSYS;
 466         return true;
 467
 468     default:
 469         return false;
 470     }
 471 }
 472
 473 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
 474                                      uint64_t arg)
 475 {
 476     struct vcpu_register_vcpu_info rvi;
 477     uint64_t gpa;
 478
 479     /* No need for 32/64 compat handling */
 480     qemu_build_assert(sizeof(rvi) == 16);
 481     qemu_build_assert(sizeof(struct vcpu_info) == 64);
 482
 483     if (!target) {
 484         return -ENOENT;
 485     }
 486
 487     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
 488         return -EFAULT;
 489     }
 490
 491     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
 492         return -EINVAL;
 493     }
 494
 495     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
 496     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
 497     return 0;
 498 }
 499
 500 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
 501                                           uint64_t arg)
 502 {
 503     struct vcpu_register_time_memory_area tma;
 504     uint64_t gpa;
 505     size_t len;
 506
 507     /* No need for 32/64 compat handling */
 508     qemu_build_assert(sizeof(tma) == 8);
 509     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
 510
 511     if (!target) {
 512         return -ENOENT;
 513     }
 514
 515     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
 516         return -EFAULT;
 517     }
 518
 519     /*
 520      * Xen actually uses the GVA and does the translation through the guest
 521      * page tables each time. But Linux/KVM uses the GPA, on the assumption
 522      * that guests only ever use *global* addresses (kernel virtual addresses)
 523      * for it. If Linux is changed to redo the GVA→GPA translation each time,
 524      * it will offer a new vCPU attribute for that, and we'll use it instead.
 525      */
 526     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
 527         len < sizeof(struct vcpu_time_info)) {
 528         return -EFAULT;
 529     }
 530
 531     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
 532                      RUN_ON_CPU_HOST_ULONG(gpa));
 533     return 0;
 534 }
 535
 536 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
 537                                          uint64_t arg)
 538 {
 539     struct vcpu_register_runstate_memory_area rma;
 540     uint64_t gpa;
 541     size_t len;
 542
 543     /* No need for 32/64 compat handling */
 544     qemu_build_assert(sizeof(rma) == 8);
 545     /* The runstate area actually does change size, but Linux copes. */
 546
 547     if (!target) {
 548         return -ENOENT;
 549     }
 550
 551     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
 552         return -EFAULT;
 553     }
 554
 555     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
 556     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
 557         return -EFAULT;
 558     }
 559
 560     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
 561                      RUN_ON_CPU_HOST_ULONG(gpa));
 562     return 0;
 563 }
 564
 565 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 566                                   int cmd, int vcpu_id, uint64_t arg)
 567 {
 568     CPUState *dest = qemu_get_cpu(vcpu_id);
 569     CPUState *cs = CPU(cpu);
 570     int err;
 571
 572     switch (cmd) {
 573     case VCPUOP_register_runstate_memory_area:
 574         err = vcpuop_register_runstate_info(cs, dest, arg);
 575         break;
 576     case VCPUOP_register_vcpu_time_memory_area:
 577         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
 578         break;
 579     case VCPUOP_register_vcpu_info:
 580         err = vcpuop_register_vcpu_info(cs, dest, arg);
 581         break;
 582
 583     default:
 584         return false;
 585     }
 586
 587     exit->u.hcall.result = err;
 588     return true;
 589 }
 590
 591 int kvm_xen_soft_reset(void)
 592 {
 593     CPUState *cpu;
 594     int err;
 595
 596     assert(qemu_mutex_iothread_locked());
 597
 598     trace_kvm_xen_soft_reset();
 599
 600     CPU_FOREACH(cpu) {
 601         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
 602     }
 603
 604     err = xen_overlay_map_shinfo_page(INVALID_GFN);
 605     if (err) {
 606         return err;
 607     }
 608
 609     return 0;
 610 }
 611
 612 static int schedop_shutdown(CPUState *cs, uint64_t arg)
 613 {
 614     struct sched_shutdown shutdown;
 615     int ret = 0;
 616
 617     /* No need for 32/64 compat handling */
 618     qemu_build_assert(sizeof(shutdown) == 4);
 619
 620     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
 621         return -EFAULT;
 622     }
 623
 624     switch (shutdown.reason) {
 625     case SHUTDOWN_crash:
 626         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
 627         qemu_system_guest_panicked(NULL);
 628         break;
 629
 630     case SHUTDOWN_reboot:
 631         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 632         break;
 633
 634     case SHUTDOWN_poweroff:
 635         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 636         break;
 637
 638     case SHUTDOWN_soft_reset:
 639         qemu_mutex_lock_iothread();
 640         ret = kvm_xen_soft_reset();
 641         qemu_mutex_unlock_iothread();
 642         break;
 643
 644     default:
 645         ret = -EINVAL;
 646         break;
 647     }
 648
 649     return ret;
 650 }
 651
 652 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 653                                    int cmd, uint64_t arg)
 654 {
 655     CPUState *cs = CPU(cpu);
 656     int err = -ENOSYS;
 657
 658     switch (cmd) {
 659     case SCHEDOP_shutdown:
 660         err = schedop_shutdown(cs, arg);
 661         break;
 662
 663     case SCHEDOP_poll:
 664         /*
 665          * Linux will panic if this doesn't work. Just yield; it's not
 666          * worth overthinking it because with event channel handling
 667          * in KVM, the kernel will intercept this and it will never
 668          * reach QEMU anyway. The semantics of the hypercall explicltly
 669          * permit spurious wakeups.
 670          */
 671     case SCHEDOP_yield:
 672         sched_yield();
 673         err = 0;
 674         break;
 675
 676     default:
 677         return false;
 678     }
 679
 680     exit->u.hcall.result = err;
 681     return true;
 682 }
 683
 684 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
 685 {
 686     uint16_t code = exit->u.hcall.input;
 687
 688     if (exit->u.hcall.cpl > 0) {
 689         exit->u.hcall.result = -EPERM;
 690         return true;
 691     }
 692
 693     switch (code) {
 694     case __HYPERVISOR_sched_op:
 695         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
 696                                       exit->u.hcall.params[1]);
 697     case __HYPERVISOR_vcpu_op:
 698         return kvm_xen_hcall_vcpu_op(exit, cpu,
 699                                      exit->u.hcall.params[0],
 700                                      exit->u.hcall.params[1],
 701                                      exit->u.hcall.params[2]);
 702     case __HYPERVISOR_hvm_op:
 703         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
 704                                     exit->u.hcall.params[1]);
 705     case __HYPERVISOR_memory_op:
 706         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
 707                                        exit->u.hcall.params[1]);
 708     case __HYPERVISOR_xen_version:
 709         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
 710                                          exit->u.hcall.params[1]);
 711     default:
 712         return false;
 713     }
 714 }
 715
 716 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
 717 {
 718     if (exit->type != KVM_EXIT_XEN_HCALL) {
 719         return -1;
 720     }
 721
 722     /*
 723      * The kernel latches the guest 32/64 mode when the MSR is used to fill
 724      * the hypercall page. So if we see a hypercall in a mode that doesn't
 725      * match our own idea of the guest mode, fetch the kernel's idea of the
 726      * "long mode" to remain in sync.
 727      */
 728     if (exit->u.hcall.longmode != xen_is_long_mode()) {
 729         xen_sync_long_mode();
 730     }
 731
 732     if (!do_kvm_xen_handle_exit(cpu, exit)) {
 733         /*
 734          * Some hypercalls will be deliberately "implemented" by returning
 735          * -ENOSYS. This case is for hypercalls which are unexpected.
 736          */
 737         exit->u.hcall.result = -ENOSYS;
 738         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
 739                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
 740                       (uint64_t)exit->u.hcall.input,
 741                       (uint64_t)exit->u.hcall.params[0],
 742                       (uint64_t)exit->u.hcall.params[1],
 743                       (uint64_t)exit->u.hcall.params[2]);
 744     }
 745
 746     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
 747                             exit->u.hcall.input, exit->u.hcall.params[0],
 748                             exit->u.hcall.params[1], exit->u.hcall.params[2],
 749                             exit->u.hcall.result);
 750     return 0;
 751 }
 752
 753 int kvm_put_xen_state(CPUState *cs)
 754 {
 755     X86CPU *cpu = X86_CPU(cs);
 756     CPUX86State *env = &cpu->env;
 757     uint64_t gpa;
 758     int ret;
 759
 760     gpa = env->xen_vcpu_info_gpa;
 761     if (gpa == INVALID_GPA) {
 762         gpa = env->xen_vcpu_info_default_gpa;
 763     }
 764
 765     if (gpa != INVALID_GPA) {
 766         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
 767         if (ret < 0) {
 768             return ret;
 769         }
 770     }
 771
 772     gpa = env->xen_vcpu_time_info_gpa;
 773     if (gpa != INVALID_GPA) {
 774         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 775                                     gpa);
 776         if (ret < 0) {
 777             return ret;
 778         }
 779     }
 780
 781     gpa = env->xen_vcpu_runstate_gpa;
 782     if (gpa != INVALID_GPA) {
 783         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 784                                     gpa);
 785         if (ret < 0) {
 786             return ret;
 787         }
 788     }
 789
 790     return 0;
 791 }
 792
 793 int kvm_get_xen_state(CPUState *cs)
 794 {
 795     X86CPU *cpu = X86_CPU(cs);
 796     CPUX86State *env = &cpu->env;
 797     uint64_t gpa;
 798
 799     /*
 800      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
 801      * to it. It's up to userspace to *assume* that any page shared thus is
 802      * always considered dirty. The shared_info page is different since it's
 803      * an overlay and migrated separately anyway.
 804      */
 805     gpa = env->xen_vcpu_info_gpa;
 806     if (gpa == INVALID_GPA) {
 807         gpa = env->xen_vcpu_info_default_gpa;
 808     }
 809     if (gpa != INVALID_GPA) {
 810         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
 811                                                      gpa,
 812                                                      sizeof(struct vcpu_info));
 813         if (mrs.mr &&
 814             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
 815             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
 816                                     sizeof(struct vcpu_info));
 817         }
 818     }
 819
 820     return 0;
 821 }