arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include <asm/ap.h>
  44 #include "kvm-s390.h"
  45 #include "gaccess.h"
  46
  47 #define KMSG_COMPONENT "kvm-s390"
  48 #undef pr_fmt
  49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  50
  51 #define CREATE_TRACE_POINTS
  52 #include "trace.h"
  53 #include "trace-s390.h"
  54
  55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  56 #define LOCAL_IRQS 32
  57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  59
  60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  62
  63 struct kvm_stats_debugfs_item debugfs_entries[] = {
  64         { "userspace_handled", VCPU_STAT(exit_userspace) },
  65         { "exit_null", VCPU_STAT(exit_null) },
  66         { "exit_validity", VCPU_STAT(exit_validity) },
  67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  68         { "exit_external_request", VCPU_STAT(exit_external_request) },
  69         { "exit_io_request", VCPU_STAT(exit_io_request) },
  70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  71         { "exit_instruction", VCPU_STAT(exit_instruction) },
  72         { "exit_pei", VCPU_STAT(exit_pei) },
  73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 159         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 160         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 161         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 162         { NULL }
 163 };
 164
 165 struct kvm_s390_tod_clock_ext {
 166         __u8 epoch_idx;
 167         __u64 tod;
 168         __u8 reserved[7];
 169 } __packed;
 170
 171 /* allow nested virtualization in KVM (if enabled by user space) */
 172 static int nested;
 173 module_param(nested, int, S_IRUGO);
 174 MODULE_PARM_DESC(nested, "Nested virtualization support");
 175
 176 /* allow 1m huge page guest backing, if !nested */
 177 static int hpage;
 178 module_param(hpage, int, 0444);
 179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 180
 181 /*
 182  * For now we handle at most 16 double words as this is what the s390 base
 183  * kernel handles and stores in the prefix page. If we ever need to go beyond
 184  * this, this requires changes to code, but the external uapi can stay.
 185  */
 186 #define SIZE_INTERNAL 16
 187
 188 /*
 189  * Base feature mask that defines default mask for facilities. Consists of the
 190  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 191  */
 192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 193 /*
 194  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 195  * and defines the facilities that can be enabled via a cpu model.
 196  */
 197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 198
 199 static unsigned long kvm_s390_fac_size(void)
 200 {
 201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 202         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 203         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 204                 sizeof(S390_lowcore.stfle_fac_list));
 205
 206         return SIZE_INTERNAL;
 207 }
 208
 209 /* available cpu features supported by kvm */
 210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 211 /* available subfunctions indicated via query / "test bit" */
 212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 213
 214 static struct gmap_notifier gmap_notifier;
 215 static struct gmap_notifier vsie_gmap_notifier;
 216 debug_info_t *kvm_s390_dbf;
 217
 218 /* Section: not file related */
 219 int kvm_arch_hardware_enable(void)
 220 {
 221         /* every s390 is virtualization enabled ;-) */
 222         return 0;
 223 }
 224
 225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 226                               unsigned long end);
 227
 228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 229 {
 230         u8 delta_idx = 0;
 231
 232         /*
 233          * The TOD jumps by delta, we have to compensate this by adding
 234          * -delta to the epoch.
 235          */
 236         delta = -delta;
 237
 238         /* sign-extension - we're adding to signed values below */
 239         if ((s64)delta < 0)
 240                 delta_idx = -1;
 241
 242         scb->epoch += delta;
 243         if (scb->ecd & ECD_MEF) {
 244                 scb->epdx += delta_idx;
 245                 if (scb->epoch < delta)
 246                         scb->epdx += 1;
 247         }
 248 }
 249
 250 /*
 251  * This callback is executed during stop_machine(). All CPUs are therefore
 252  * temporarily stopped. In order not to change guest behavior, we have to
 253  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 254  * so a CPU won't be stopped while calculating with the epoch.
 255  */
 256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 257                           void *v)
 258 {
 259         struct kvm *kvm;
 260         struct kvm_vcpu *vcpu;
 261         int i;
 262         unsigned long long *delta = v;
 263
 264         list_for_each_entry(kvm, &vm_list, vm_list) {
 265                 kvm_for_each_vcpu(i, vcpu, kvm) {
 266                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 267                         if (i == 0) {
 268                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 269                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 270                         }
 271                         if (vcpu->arch.cputm_enabled)
 272                                 vcpu->arch.cputm_start += *delta;
 273                         if (vcpu->arch.vsie_block)
 274                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 275                                                    *delta);
 276                 }
 277         }
 278         return NOTIFY_OK;
 279 }
 280
 281 static struct notifier_block kvm_clock_notifier = {
 282         .notifier_call = kvm_clock_sync,
 283 };
 284
 285 int kvm_arch_hardware_setup(void)
 286 {
 287         gmap_notifier.notifier_call = kvm_gmap_notifier;
 288         gmap_register_pte_notifier(&gmap_notifier);
 289         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 290         gmap_register_pte_notifier(&vsie_gmap_notifier);
 291         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 292                                        &kvm_clock_notifier);
 293         return 0;
 294 }
 295
 296 void kvm_arch_hardware_unsetup(void)
 297 {
 298         gmap_unregister_pte_notifier(&gmap_notifier);
 299         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 300         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 301                                          &kvm_clock_notifier);
 302 }
 303
 304 static void allow_cpu_feat(unsigned long nr)
 305 {
 306         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 307 }
 308
 309 static inline int plo_test_bit(unsigned char nr)
 310 {
 311         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 312         int cc;
 313
 314         asm volatile(
 315                 /* Parameter registers are ignored for "test bit" */
 316                 "       plo     0,0,0,0(0)\n"
 317                 "       ipm     %0\n"
 318                 "       srl     %0,28\n"
 319                 : "=d" (cc)
 320                 : "d" (r0)
 321                 : "cc");
 322         return cc == 0;
 323 }
 324
 325 static void kvm_s390_cpu_feat_init(void)
 326 {
 327         int i;
 328
 329         for (i = 0; i < 256; ++i) {
 330                 if (plo_test_bit(i))
 331                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 332         }
 333
 334         if (test_facility(28)) /* TOD-clock steering */
 335                 ptff(kvm_s390_available_subfunc.ptff,
 336                      sizeof(kvm_s390_available_subfunc.ptff),
 337                      PTFF_QAF);
 338
 339         if (test_facility(17)) { /* MSA */
 340                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 341                               kvm_s390_available_subfunc.kmac);
 342                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 343                               kvm_s390_available_subfunc.kmc);
 344                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 345                               kvm_s390_available_subfunc.km);
 346                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 347                               kvm_s390_available_subfunc.kimd);
 348                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 349                               kvm_s390_available_subfunc.klmd);
 350         }
 351         if (test_facility(76)) /* MSA3 */
 352                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 353                               kvm_s390_available_subfunc.pckmo);
 354         if (test_facility(77)) { /* MSA4 */
 355                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 356                               kvm_s390_available_subfunc.kmctr);
 357                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 358                               kvm_s390_available_subfunc.kmf);
 359                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 360                               kvm_s390_available_subfunc.kmo);
 361                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 362                               kvm_s390_available_subfunc.pcc);
 363         }
 364         if (test_facility(57)) /* MSA5 */
 365                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 366                               kvm_s390_available_subfunc.ppno);
 367
 368         if (test_facility(146)) /* MSA8 */
 369                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.kma);
 371
 372         if (MACHINE_HAS_ESOP)
 373                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 374         /*
 375          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 376          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 377          */
 378         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 379             !test_facility(3) || !nested)
 380                 return;
 381         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 382         if (sclp.has_64bscao)
 383                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 384         if (sclp.has_siif)
 385                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 386         if (sclp.has_gpere)
 387                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 388         if (sclp.has_gsls)
 389                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 390         if (sclp.has_ib)
 391                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 392         if (sclp.has_cei)
 393                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 394         if (sclp.has_ibs)
 395                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 396         if (sclp.has_kss)
 397                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 398         /*
 399          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 400          * all skey handling functions read/set the skey from the PGSTE
 401          * instead of the real storage key.
 402          *
 403          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 404          * pages being detected as preserved although they are resident.
 405          *
 406          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 407          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 408          *
 409          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 410          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 411          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 412          *
 413          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 414          * cannot easily shadow the SCA because of the ipte lock.
 415          */
 416 }
 417
 418 int kvm_arch_init(void *opaque)
 419 {
 420         int rc;
 421
 422         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 423         if (!kvm_s390_dbf)
 424                 return -ENOMEM;
 425
 426         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 427                 rc = -ENOMEM;
 428                 goto out_debug_unreg;
 429         }
 430
 431         kvm_s390_cpu_feat_init();
 432
 433         /* Register floating interrupt controller interface. */
 434         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 435         if (rc) {
 436                 pr_err("Failed to register FLIC rc=%d\n", rc);
 437                 goto out_debug_unreg;
 438         }
 439         return 0;
 440
 441 out_debug_unreg:
 442         debug_unregister(kvm_s390_dbf);
 443         return rc;
 444 }
 445
 446 void kvm_arch_exit(void)
 447 {
 448         debug_unregister(kvm_s390_dbf);
 449 }
 450
 451 /* Section: device related */
 452 long kvm_arch_dev_ioctl(struct file *filp,
 453                         unsigned int ioctl, unsigned long arg)
 454 {
 455         if (ioctl == KVM_S390_ENABLE_SIE)
 456                 return s390_enable_sie();
 457         return -EINVAL;
 458 }
 459
 460 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 461 {
 462         int r;
 463
 464         switch (ext) {
 465         case KVM_CAP_S390_PSW:
 466         case KVM_CAP_S390_GMAP:
 467         case KVM_CAP_SYNC_MMU:
 468 #ifdef CONFIG_KVM_S390_UCONTROL
 469         case KVM_CAP_S390_UCONTROL:
 470 #endif
 471         case KVM_CAP_ASYNC_PF:
 472         case KVM_CAP_SYNC_REGS:
 473         case KVM_CAP_ONE_REG:
 474         case KVM_CAP_ENABLE_CAP:
 475         case KVM_CAP_S390_CSS_SUPPORT:
 476         case KVM_CAP_IOEVENTFD:
 477         case KVM_CAP_DEVICE_CTRL:
 478         case KVM_CAP_ENABLE_CAP_VM:
 479         case KVM_CAP_S390_IRQCHIP:
 480         case KVM_CAP_VM_ATTRIBUTES:
 481         case KVM_CAP_MP_STATE:
 482         case KVM_CAP_IMMEDIATE_EXIT:
 483         case KVM_CAP_S390_INJECT_IRQ:
 484         case KVM_CAP_S390_USER_SIGP:
 485         case KVM_CAP_S390_USER_STSI:
 486         case KVM_CAP_S390_SKEYS:
 487         case KVM_CAP_S390_IRQ_STATE:
 488         case KVM_CAP_S390_USER_INSTR0:
 489         case KVM_CAP_S390_CMMA_MIGRATION:
 490         case KVM_CAP_S390_AIS:
 491         case KVM_CAP_S390_AIS_MIGRATION:
 492                 r = 1;
 493                 break;
 494         case KVM_CAP_S390_HPAGE_1M:
 495                 r = 0;
 496                 if (hpage && !kvm_is_ucontrol(kvm))
 497                         r = 1;
 498                 break;
 499         case KVM_CAP_S390_MEM_OP:
 500                 r = MEM_OP_MAX_SIZE;
 501                 break;
 502         case KVM_CAP_NR_VCPUS:
 503         case KVM_CAP_MAX_VCPUS:
 504                 r = KVM_S390_BSCA_CPU_SLOTS;
 505                 if (!kvm_s390_use_sca_entries())
 506                         r = KVM_MAX_VCPUS;
 507                 else if (sclp.has_esca && sclp.has_64bscao)
 508                         r = KVM_S390_ESCA_CPU_SLOTS;
 509                 break;
 510         case KVM_CAP_NR_MEMSLOTS:
 511                 r = KVM_USER_MEM_SLOTS;
 512                 break;
 513         case KVM_CAP_S390_COW:
 514                 r = MACHINE_HAS_ESOP;
 515                 break;
 516         case KVM_CAP_S390_VECTOR_REGISTERS:
 517                 r = MACHINE_HAS_VX;
 518                 break;
 519         case KVM_CAP_S390_RI:
 520                 r = test_facility(64);
 521                 break;
 522         case KVM_CAP_S390_GS:
 523                 r = test_facility(133);
 524                 break;
 525         case KVM_CAP_S390_BPB:
 526                 r = test_facility(82);
 527                 break;
 528         default:
 529                 r = 0;
 530         }
 531         return r;
 532 }
 533
 534 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 535                                     struct kvm_memory_slot *memslot)
 536 {
 537         int i;
 538         gfn_t cur_gfn, last_gfn;
 539         unsigned long gaddr, vmaddr;
 540         struct gmap *gmap = kvm->arch.gmap;
 541         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 542
 543         /* Loop over all guest segments */
 544         cur_gfn = memslot->base_gfn;
 545         last_gfn = memslot->base_gfn + memslot->npages;
 546         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 547                 gaddr = gfn_to_gpa(cur_gfn);
 548                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 549                 if (kvm_is_error_hva(vmaddr))
 550                         continue;
 551
 552                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 553                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 554                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 555                         if (test_bit(i, bitmap))
 556                                 mark_page_dirty(kvm, cur_gfn + i);
 557                 }
 558
 559                 if (fatal_signal_pending(current))
 560                         return;
 561                 cond_resched();
 562         }
 563 }
 564
 565 /* Section: vm related */
 566 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 567
 568 /*
 569  * Get (and clear) the dirty memory log for a memory slot.
 570  */
 571 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 572                                struct kvm_dirty_log *log)
 573 {
 574         int r;
 575         unsigned long n;
 576         struct kvm_memslots *slots;
 577         struct kvm_memory_slot *memslot;
 578         int is_dirty = 0;
 579
 580         if (kvm_is_ucontrol(kvm))
 581                 return -EINVAL;
 582
 583         mutex_lock(&kvm->slots_lock);
 584
 585         r = -EINVAL;
 586         if (log->slot >= KVM_USER_MEM_SLOTS)
 587                 goto out;
 588
 589         slots = kvm_memslots(kvm);
 590         memslot = id_to_memslot(slots, log->slot);
 591         r = -ENOENT;
 592         if (!memslot->dirty_bitmap)
 593                 goto out;
 594
 595         kvm_s390_sync_dirty_log(kvm, memslot);
 596         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 597         if (r)
 598                 goto out;
 599
 600         /* Clear the dirty log */
 601         if (is_dirty) {
 602                 n = kvm_dirty_bitmap_bytes(memslot);
 603                 memset(memslot->dirty_bitmap, 0, n);
 604         }
 605         r = 0;
 606 out:
 607         mutex_unlock(&kvm->slots_lock);
 608         return r;
 609 }
 610
 611 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 612 {
 613         unsigned int i;
 614         struct kvm_vcpu *vcpu;
 615
 616         kvm_for_each_vcpu(i, vcpu, kvm) {
 617                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 618         }
 619 }
 620
 621 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 622 {
 623         int r;
 624
 625         if (cap->flags)
 626                 return -EINVAL;
 627
 628         switch (cap->cap) {
 629         case KVM_CAP_S390_IRQCHIP:
 630                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 631                 kvm->arch.use_irqchip = 1;
 632                 r = 0;
 633                 break;
 634         case KVM_CAP_S390_USER_SIGP:
 635                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 636                 kvm->arch.user_sigp = 1;
 637                 r = 0;
 638                 break;
 639         case KVM_CAP_S390_VECTOR_REGISTERS:
 640                 mutex_lock(&kvm->lock);
 641                 if (kvm->created_vcpus) {
 642                         r = -EBUSY;
 643                 } else if (MACHINE_HAS_VX) {
 644                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 645                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 646                         if (test_facility(134)) {
 647                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 648                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 649                         }
 650                         if (test_facility(135)) {
 651                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 652                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 653                         }
 654                         r = 0;
 655                 } else
 656                         r = -EINVAL;
 657                 mutex_unlock(&kvm->lock);
 658                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 659                          r ? "(not available)" : "(success)");
 660                 break;
 661         case KVM_CAP_S390_RI:
 662                 r = -EINVAL;
 663                 mutex_lock(&kvm->lock);
 664                 if (kvm->created_vcpus) {
 665                         r = -EBUSY;
 666                 } else if (test_facility(64)) {
 667                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 668                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 669                         r = 0;
 670                 }
 671                 mutex_unlock(&kvm->lock);
 672                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 673                          r ? "(not available)" : "(success)");
 674                 break;
 675         case KVM_CAP_S390_AIS:
 676                 mutex_lock(&kvm->lock);
 677                 if (kvm->created_vcpus) {
 678                         r = -EBUSY;
 679                 } else {
 680                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 681                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 682                         r = 0;
 683                 }
 684                 mutex_unlock(&kvm->lock);
 685                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 686                          r ? "(not available)" : "(success)");
 687                 break;
 688         case KVM_CAP_S390_GS:
 689                 r = -EINVAL;
 690                 mutex_lock(&kvm->lock);
 691                 if (kvm->created_vcpus) {
 692                         r = -EBUSY;
 693                 } else if (test_facility(133)) {
 694                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 695                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 696                         r = 0;
 697                 }
 698                 mutex_unlock(&kvm->lock);
 699                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 700                          r ? "(not available)" : "(success)");
 701                 break;
 702         case KVM_CAP_S390_HPAGE_1M:
 703                 mutex_lock(&kvm->lock);
 704                 if (kvm->created_vcpus)
 705                         r = -EBUSY;
 706                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 707                         r = -EINVAL;
 708                 else {
 709                         r = 0;
 710                         down_write(&kvm->mm->mmap_sem);
 711                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 712                         up_write(&kvm->mm->mmap_sem);
 713                         /*
 714                          * We might have to create fake 4k page
 715                          * tables. To avoid that the hardware works on
 716                          * stale PGSTEs, we emulate these instructions.
 717                          */
 718                         kvm->arch.use_skf = 0;
 719                         kvm->arch.use_pfmfi = 0;
 720                 }
 721                 mutex_unlock(&kvm->lock);
 722                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 723                          r ? "(not available)" : "(success)");
 724                 break;
 725         case KVM_CAP_S390_USER_STSI:
 726                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 727                 kvm->arch.user_stsi = 1;
 728                 r = 0;
 729                 break;
 730         case KVM_CAP_S390_USER_INSTR0:
 731                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 732                 kvm->arch.user_instr0 = 1;
 733                 icpt_operexc_on_all_vcpus(kvm);
 734                 r = 0;
 735                 break;
 736         default:
 737                 r = -EINVAL;
 738                 break;
 739         }
 740         return r;
 741 }
 742
 743 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 744 {
 745         int ret;
 746
 747         switch (attr->attr) {
 748         case KVM_S390_VM_MEM_LIMIT_SIZE:
 749                 ret = 0;
 750                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 751                          kvm->arch.mem_limit);
 752                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 753                         ret = -EFAULT;
 754                 break;
 755         default:
 756                 ret = -ENXIO;
 757                 break;
 758         }
 759         return ret;
 760 }
 761
 762 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 763 {
 764         int ret;
 765         unsigned int idx;
 766         switch (attr->attr) {
 767         case KVM_S390_VM_MEM_ENABLE_CMMA:
 768                 ret = -ENXIO;
 769                 if (!sclp.has_cmma)
 770                         break;
 771
 772                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 773                 mutex_lock(&kvm->lock);
 774                 if (kvm->created_vcpus)
 775                         ret = -EBUSY;
 776                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 777                         ret = -EINVAL;
 778                 else {
 779                         kvm->arch.use_cmma = 1;
 780                         /* Not compatible with cmma. */
 781                         kvm->arch.use_pfmfi = 0;
 782                         ret = 0;
 783                 }
 784                 mutex_unlock(&kvm->lock);
 785                 break;
 786         case KVM_S390_VM_MEM_CLR_CMMA:
 787                 ret = -ENXIO;
 788                 if (!sclp.has_cmma)
 789                         break;
 790                 ret = -EINVAL;
 791                 if (!kvm->arch.use_cmma)
 792                         break;
 793
 794                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 795                 mutex_lock(&kvm->lock);
 796                 idx = srcu_read_lock(&kvm->srcu);
 797                 s390_reset_cmma(kvm->arch.gmap->mm);
 798                 srcu_read_unlock(&kvm->srcu, idx);
 799                 mutex_unlock(&kvm->lock);
 800                 ret = 0;
 801                 break;
 802         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 803                 unsigned long new_limit;
 804
 805                 if (kvm_is_ucontrol(kvm))
 806                         return -EINVAL;
 807
 808                 if (get_user(new_limit, (u64 __user *)attr->addr))
 809                         return -EFAULT;
 810
 811                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 812                     new_limit > kvm->arch.mem_limit)
 813                         return -E2BIG;
 814
 815                 if (!new_limit)
 816                         return -EINVAL;
 817
 818                 /* gmap_create takes last usable address */
 819                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 820                         new_limit -= 1;
 821
 822                 ret = -EBUSY;
 823                 mutex_lock(&kvm->lock);
 824                 if (!kvm->created_vcpus) {
 825                         /* gmap_create will round the limit up */
 826                         struct gmap *new = gmap_create(current->mm, new_limit);
 827
 828                         if (!new) {
 829                                 ret = -ENOMEM;
 830                         } else {
 831                                 gmap_remove(kvm->arch.gmap);
 832                                 new->private = kvm;
 833                                 kvm->arch.gmap = new;
 834                                 ret = 0;
 835                         }
 836                 }
 837                 mutex_unlock(&kvm->lock);
 838                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 839                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 840                          (void *) kvm->arch.gmap->asce);
 841                 break;
 842         }
 843         default:
 844                 ret = -ENXIO;
 845                 break;
 846         }
 847         return ret;
 848 }
 849
 850 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 851
 852 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 853 {
 854         struct kvm_vcpu *vcpu;
 855         int i;
 856
 857         kvm_s390_vcpu_block_all(kvm);
 858
 859         kvm_for_each_vcpu(i, vcpu, kvm) {
 860                 kvm_s390_vcpu_crypto_setup(vcpu);
 861                 /* recreate the shadow crycb by leaving the VSIE handler */
 862                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 863         }
 864
 865         kvm_s390_vcpu_unblock_all(kvm);
 866 }
 867
 868 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 869 {
 870         mutex_lock(&kvm->lock);
 871         switch (attr->attr) {
 872         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 873                 if (!test_kvm_facility(kvm, 76)) {
 874                         mutex_unlock(&kvm->lock);
 875                         return -EINVAL;
 876                 }
 877                 get_random_bytes(
 878                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 879                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 880                 kvm->arch.crypto.aes_kw = 1;
 881                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 882                 break;
 883         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 884                 if (!test_kvm_facility(kvm, 76)) {
 885                         mutex_unlock(&kvm->lock);
 886                         return -EINVAL;
 887                 }
 888                 get_random_bytes(
 889                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 890                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 891                 kvm->arch.crypto.dea_kw = 1;
 892                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 893                 break;
 894         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 895                 if (!test_kvm_facility(kvm, 76)) {
 896                         mutex_unlock(&kvm->lock);
 897                         return -EINVAL;
 898                 }
 899                 kvm->arch.crypto.aes_kw = 0;
 900                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 901                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 902                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 903                 break;
 904         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 905                 if (!test_kvm_facility(kvm, 76)) {
 906                         mutex_unlock(&kvm->lock);
 907                         return -EINVAL;
 908                 }
 909                 kvm->arch.crypto.dea_kw = 0;
 910                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 911                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 912                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 913                 break;
 914         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 915                 if (!ap_instructions_available()) {
 916                         mutex_unlock(&kvm->lock);
 917                         return -EOPNOTSUPP;
 918                 }
 919                 kvm->arch.crypto.apie = 1;
 920                 break;
 921         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 922                 if (!ap_instructions_available()) {
 923                         mutex_unlock(&kvm->lock);
 924                         return -EOPNOTSUPP;
 925                 }
 926                 kvm->arch.crypto.apie = 0;
 927                 break;
 928         default:
 929                 mutex_unlock(&kvm->lock);
 930                 return -ENXIO;
 931         }
 932
 933         kvm_s390_vcpu_crypto_reset_all(kvm);
 934         mutex_unlock(&kvm->lock);
 935         return 0;
 936 }
 937
 938 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 939 {
 940         int cx;
 941         struct kvm_vcpu *vcpu;
 942
 943         kvm_for_each_vcpu(cx, vcpu, kvm)
 944                 kvm_s390_sync_request(req, vcpu);
 945 }
 946
 947 /*
 948  * Must be called with kvm->srcu held to avoid races on memslots, and with
 949  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 950  */
 951 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 952 {
 953         struct kvm_memory_slot *ms;
 954         struct kvm_memslots *slots;
 955         unsigned long ram_pages = 0;
 956         int slotnr;
 957
 958         /* migration mode already enabled */
 959         if (kvm->arch.migration_mode)
 960                 return 0;
 961         slots = kvm_memslots(kvm);
 962         if (!slots || !slots->used_slots)
 963                 return -EINVAL;
 964
 965         if (!kvm->arch.use_cmma) {
 966                 kvm->arch.migration_mode = 1;
 967                 return 0;
 968         }
 969         /* mark all the pages in active slots as dirty */
 970         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 971                 ms = slots->memslots + slotnr;
 972                 /*
 973                  * The second half of the bitmap is only used on x86,
 974                  * and would be wasted otherwise, so we put it to good
 975                  * use here to keep track of the state of the storage
 976                  * attributes.
 977                  */
 978                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
 979                 ram_pages += ms->npages;
 980         }
 981         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
 982         kvm->arch.migration_mode = 1;
 983         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 984         return 0;
 985 }
 986
 987 /*
 988  * Must be called with kvm->slots_lock to avoid races with ourselves and
 989  * kvm_s390_vm_start_migration.
 990  */
 991 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 992 {
 993         /* migration mode already disabled */
 994         if (!kvm->arch.migration_mode)
 995                 return 0;
 996         kvm->arch.migration_mode = 0;
 997         if (kvm->arch.use_cmma)
 998                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 999         return 0;
1000 }
1001
1002 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1003                                      struct kvm_device_attr *attr)
1004 {
1005         int res = -ENXIO;
1006
1007         mutex_lock(&kvm->slots_lock);
1008         switch (attr->attr) {
1009         case KVM_S390_VM_MIGRATION_START:
1010                 res = kvm_s390_vm_start_migration(kvm);
1011                 break;
1012         case KVM_S390_VM_MIGRATION_STOP:
1013                 res = kvm_s390_vm_stop_migration(kvm);
1014                 break;
1015         default:
1016                 break;
1017         }
1018         mutex_unlock(&kvm->slots_lock);
1019
1020         return res;
1021 }
1022
1023 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1024                                      struct kvm_device_attr *attr)
1025 {
1026         u64 mig = kvm->arch.migration_mode;
1027
1028         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1029                 return -ENXIO;
1030
1031         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1032                 return -EFAULT;
1033         return 0;
1034 }
1035
1036 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1037 {
1038         struct kvm_s390_vm_tod_clock gtod;
1039
1040         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1041                 return -EFAULT;
1042
1043         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1044                 return -EINVAL;
1045         kvm_s390_set_tod_clock(kvm, &gtod);
1046
1047         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1048                 gtod.epoch_idx, gtod.tod);
1049
1050         return 0;
1051 }
1052
1053 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1054 {
1055         u8 gtod_high;
1056
1057         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1058                                            sizeof(gtod_high)))
1059                 return -EFAULT;
1060
1061         if (gtod_high != 0)
1062                 return -EINVAL;
1063         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1064
1065         return 0;
1066 }
1067
1068 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1069 {
1070         struct kvm_s390_vm_tod_clock gtod = { 0 };
1071
1072         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1073                            sizeof(gtod.tod)))
1074                 return -EFAULT;
1075
1076         kvm_s390_set_tod_clock(kvm, &gtod);
1077         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1078         return 0;
1079 }
1080
1081 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1082 {
1083         int ret;
1084
1085         if (attr->flags)
1086                 return -EINVAL;
1087
1088         switch (attr->attr) {
1089         case KVM_S390_VM_TOD_EXT:
1090                 ret = kvm_s390_set_tod_ext(kvm, attr);
1091                 break;
1092         case KVM_S390_VM_TOD_HIGH:
1093                 ret = kvm_s390_set_tod_high(kvm, attr);
1094                 break;
1095         case KVM_S390_VM_TOD_LOW:
1096                 ret = kvm_s390_set_tod_low(kvm, attr);
1097                 break;
1098         default:
1099                 ret = -ENXIO;
1100                 break;
1101         }
1102         return ret;
1103 }
1104
1105 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1106                                    struct kvm_s390_vm_tod_clock *gtod)
1107 {
1108         struct kvm_s390_tod_clock_ext htod;
1109
1110         preempt_disable();
1111
1112         get_tod_clock_ext((char *)&htod);
1113
1114         gtod->tod = htod.tod + kvm->arch.epoch;
1115         gtod->epoch_idx = 0;
1116         if (test_kvm_facility(kvm, 139)) {
1117                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1118                 if (gtod->tod < htod.tod)
1119                         gtod->epoch_idx += 1;
1120         }
1121
1122         preempt_enable();
1123 }
1124
1125 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1126 {
1127         struct kvm_s390_vm_tod_clock gtod;
1128
1129         memset(&gtod, 0, sizeof(gtod));
1130         kvm_s390_get_tod_clock(kvm, &gtod);
1131         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1132                 return -EFAULT;
1133
1134         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1135                 gtod.epoch_idx, gtod.tod);
1136         return 0;
1137 }
1138
1139 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141         u8 gtod_high = 0;
1142
1143         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1144                                          sizeof(gtod_high)))
1145                 return -EFAULT;
1146         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1147
1148         return 0;
1149 }
1150
1151 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1152 {
1153         u64 gtod;
1154
1155         gtod = kvm_s390_get_tod_clock_fast(kvm);
1156         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1157                 return -EFAULT;
1158         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1159
1160         return 0;
1161 }
1162
1163 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1164 {
1165         int ret;
1166
1167         if (attr->flags)
1168                 return -EINVAL;
1169
1170         switch (attr->attr) {
1171         case KVM_S390_VM_TOD_EXT:
1172                 ret = kvm_s390_get_tod_ext(kvm, attr);
1173                 break;
1174         case KVM_S390_VM_TOD_HIGH:
1175                 ret = kvm_s390_get_tod_high(kvm, attr);
1176                 break;
1177         case KVM_S390_VM_TOD_LOW:
1178                 ret = kvm_s390_get_tod_low(kvm, attr);
1179                 break;
1180         default:
1181                 ret = -ENXIO;
1182                 break;
1183         }
1184         return ret;
1185 }
1186
1187 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1188 {
1189         struct kvm_s390_vm_cpu_processor *proc;
1190         u16 lowest_ibc, unblocked_ibc;
1191         int ret = 0;
1192
1193         mutex_lock(&kvm->lock);
1194         if (kvm->created_vcpus) {
1195                 ret = -EBUSY;
1196                 goto out;
1197         }
1198         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1199         if (!proc) {
1200                 ret = -ENOMEM;
1201                 goto out;
1202         }
1203         if (!copy_from_user(proc, (void __user *)attr->addr,
1204                             sizeof(*proc))) {
1205                 kvm->arch.model.cpuid = proc->cpuid;
1206                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1207                 unblocked_ibc = sclp.ibc & 0xfff;
1208                 if (lowest_ibc && proc->ibc) {
1209                         if (proc->ibc > unblocked_ibc)
1210                                 kvm->arch.model.ibc = unblocked_ibc;
1211                         else if (proc->ibc < lowest_ibc)
1212                                 kvm->arch.model.ibc = lowest_ibc;
1213                         else
1214                                 kvm->arch.model.ibc = proc->ibc;
1215                 }
1216                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1217                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1218                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1219                          kvm->arch.model.ibc,
1220                          kvm->arch.model.cpuid);
1221                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1222                          kvm->arch.model.fac_list[0],
1223                          kvm->arch.model.fac_list[1],
1224                          kvm->arch.model.fac_list[2]);
1225         } else
1226                 ret = -EFAULT;
1227         kfree(proc);
1228 out:
1229         mutex_unlock(&kvm->lock);
1230         return ret;
1231 }
1232
1233 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1234                                        struct kvm_device_attr *attr)
1235 {
1236         struct kvm_s390_vm_cpu_feat data;
1237
1238         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1239                 return -EFAULT;
1240         if (!bitmap_subset((unsigned long *) data.feat,
1241                            kvm_s390_available_cpu_feat,
1242                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1243                 return -EINVAL;
1244
1245         mutex_lock(&kvm->lock);
1246         if (kvm->created_vcpus) {
1247                 mutex_unlock(&kvm->lock);
1248                 return -EBUSY;
1249         }
1250         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1251                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1252         mutex_unlock(&kvm->lock);
1253         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1254                          data.feat[0],
1255                          data.feat[1],
1256                          data.feat[2]);
1257         return 0;
1258 }
1259
1260 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1261                                           struct kvm_device_attr *attr)
1262 {
1263         /*
1264          * Once supported by kernel + hw, we have to store the subfunctions
1265          * in kvm->arch and remember that user space configured them.
1266          */
1267         return -ENXIO;
1268 }
1269
1270 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1271 {
1272         int ret = -ENXIO;
1273
1274         switch (attr->attr) {
1275         case KVM_S390_VM_CPU_PROCESSOR:
1276                 ret = kvm_s390_set_processor(kvm, attr);
1277                 break;
1278         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1279                 ret = kvm_s390_set_processor_feat(kvm, attr);
1280                 break;
1281         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1282                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1283                 break;
1284         }
1285         return ret;
1286 }
1287
1288 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1289 {
1290         struct kvm_s390_vm_cpu_processor *proc;
1291         int ret = 0;
1292
1293         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1294         if (!proc) {
1295                 ret = -ENOMEM;
1296                 goto out;
1297         }
1298         proc->cpuid = kvm->arch.model.cpuid;
1299         proc->ibc = kvm->arch.model.ibc;
1300         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1301                S390_ARCH_FAC_LIST_SIZE_BYTE);
1302         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1303                  kvm->arch.model.ibc,
1304                  kvm->arch.model.cpuid);
1305         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1306                  kvm->arch.model.fac_list[0],
1307                  kvm->arch.model.fac_list[1],
1308                  kvm->arch.model.fac_list[2]);
1309         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1310                 ret = -EFAULT;
1311         kfree(proc);
1312 out:
1313         return ret;
1314 }
1315
1316 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1317 {
1318         struct kvm_s390_vm_cpu_machine *mach;
1319         int ret = 0;
1320
1321         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1322         if (!mach) {
1323                 ret = -ENOMEM;
1324                 goto out;
1325         }
1326         get_cpu_id((struct cpuid *) &mach->cpuid);
1327         mach->ibc = sclp.ibc;
1328         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1329                S390_ARCH_FAC_LIST_SIZE_BYTE);
1330         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1331                sizeof(S390_lowcore.stfle_fac_list));
1332         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1333                  kvm->arch.model.ibc,
1334                  kvm->arch.model.cpuid);
1335         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1336                  mach->fac_mask[0],
1337                  mach->fac_mask[1],
1338                  mach->fac_mask[2]);
1339         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1340                  mach->fac_list[0],
1341                  mach->fac_list[1],
1342                  mach->fac_list[2]);
1343         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1344                 ret = -EFAULT;
1345         kfree(mach);
1346 out:
1347         return ret;
1348 }
1349
1350 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1351                                        struct kvm_device_attr *attr)
1352 {
1353         struct kvm_s390_vm_cpu_feat data;
1354
1355         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1356                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1357         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1358                 return -EFAULT;
1359         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1360                          data.feat[0],
1361                          data.feat[1],
1362                          data.feat[2]);
1363         return 0;
1364 }
1365
1366 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1367                                      struct kvm_device_attr *attr)
1368 {
1369         struct kvm_s390_vm_cpu_feat data;
1370
1371         bitmap_copy((unsigned long *) data.feat,
1372                     kvm_s390_available_cpu_feat,
1373                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1374         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1375                 return -EFAULT;
1376         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1377                          data.feat[0],
1378                          data.feat[1],
1379                          data.feat[2]);
1380         return 0;
1381 }
1382
1383 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1384                                           struct kvm_device_attr *attr)
1385 {
1386         /*
1387          * Once we can actually configure subfunctions (kernel + hw support),
1388          * we have to check if they were already set by user space, if so copy
1389          * them from kvm->arch.
1390          */
1391         return -ENXIO;
1392 }
1393
1394 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1395                                         struct kvm_device_attr *attr)
1396 {
1397         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1398             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1399                 return -EFAULT;
1400         return 0;
1401 }
1402 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1403 {
1404         int ret = -ENXIO;
1405
1406         switch (attr->attr) {
1407         case KVM_S390_VM_CPU_PROCESSOR:
1408                 ret = kvm_s390_get_processor(kvm, attr);
1409                 break;
1410         case KVM_S390_VM_CPU_MACHINE:
1411                 ret = kvm_s390_get_machine(kvm, attr);
1412                 break;
1413         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1414                 ret = kvm_s390_get_processor_feat(kvm, attr);
1415                 break;
1416         case KVM_S390_VM_CPU_MACHINE_FEAT:
1417                 ret = kvm_s390_get_machine_feat(kvm, attr);
1418                 break;
1419         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1420                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1421                 break;
1422         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1423                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1424                 break;
1425         }
1426         return ret;
1427 }
1428
1429 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1430 {
1431         int ret;
1432
1433         switch (attr->group) {
1434         case KVM_S390_VM_MEM_CTRL:
1435                 ret = kvm_s390_set_mem_control(kvm, attr);
1436                 break;
1437         case KVM_S390_VM_TOD:
1438                 ret = kvm_s390_set_tod(kvm, attr);
1439                 break;
1440         case KVM_S390_VM_CPU_MODEL:
1441                 ret = kvm_s390_set_cpu_model(kvm, attr);
1442                 break;
1443         case KVM_S390_VM_CRYPTO:
1444                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1445                 break;
1446         case KVM_S390_VM_MIGRATION:
1447                 ret = kvm_s390_vm_set_migration(kvm, attr);
1448                 break;
1449         default:
1450                 ret = -ENXIO;
1451                 break;
1452         }
1453
1454         return ret;
1455 }
1456
1457 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1458 {
1459         int ret;
1460
1461         switch (attr->group) {
1462         case KVM_S390_VM_MEM_CTRL:
1463                 ret = kvm_s390_get_mem_control(kvm, attr);
1464                 break;
1465         case KVM_S390_VM_TOD:
1466                 ret = kvm_s390_get_tod(kvm, attr);
1467                 break;
1468         case KVM_S390_VM_CPU_MODEL:
1469                 ret = kvm_s390_get_cpu_model(kvm, attr);
1470                 break;
1471         case KVM_S390_VM_MIGRATION:
1472                 ret = kvm_s390_vm_get_migration(kvm, attr);
1473                 break;
1474         default:
1475                 ret = -ENXIO;
1476                 break;
1477         }
1478
1479         return ret;
1480 }
1481
1482 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1483 {
1484         int ret;
1485
1486         switch (attr->group) {
1487         case KVM_S390_VM_MEM_CTRL:
1488                 switch (attr->attr) {
1489                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1490                 case KVM_S390_VM_MEM_CLR_CMMA:
1491                         ret = sclp.has_cmma ? 0 : -ENXIO;
1492                         break;
1493                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1494                         ret = 0;
1495                         break;
1496                 default:
1497                         ret = -ENXIO;
1498                         break;
1499                 }
1500                 break;
1501         case KVM_S390_VM_TOD:
1502                 switch (attr->attr) {
1503                 case KVM_S390_VM_TOD_LOW:
1504                 case KVM_S390_VM_TOD_HIGH:
1505                         ret = 0;
1506                         break;
1507                 default:
1508                         ret = -ENXIO;
1509                         break;
1510                 }
1511                 break;
1512         case KVM_S390_VM_CPU_MODEL:
1513                 switch (attr->attr) {
1514                 case KVM_S390_VM_CPU_PROCESSOR:
1515                 case KVM_S390_VM_CPU_MACHINE:
1516                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1517                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1518                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1519                         ret = 0;
1520                         break;
1521                 /* configuring subfunctions is not supported yet */
1522                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1523                 default:
1524                         ret = -ENXIO;
1525                         break;
1526                 }
1527                 break;
1528         case KVM_S390_VM_CRYPTO:
1529                 switch (attr->attr) {
1530                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1531                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1532                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1533                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1534                         ret = 0;
1535                         break;
1536                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1537                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1538                         ret = ap_instructions_available() ? 0 : -ENXIO;
1539                         break;
1540                 default:
1541                         ret = -ENXIO;
1542                         break;
1543                 }
1544                 break;
1545         case KVM_S390_VM_MIGRATION:
1546                 ret = 0;
1547                 break;
1548         default:
1549                 ret = -ENXIO;
1550                 break;
1551         }
1552
1553         return ret;
1554 }
1555
1556 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1557 {
1558         uint8_t *keys;
1559         uint64_t hva;
1560         int srcu_idx, i, r = 0;
1561
1562         if (args->flags != 0)
1563                 return -EINVAL;
1564
1565         /* Is this guest using storage keys? */
1566         if (!mm_uses_skeys(current->mm))
1567                 return KVM_S390_GET_SKEYS_NONE;
1568
1569         /* Enforce sane limit on memory allocation */
1570         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1571                 return -EINVAL;
1572
1573         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1574         if (!keys)
1575                 return -ENOMEM;
1576
1577         down_read(&current->mm->mmap_sem);
1578         srcu_idx = srcu_read_lock(&kvm->srcu);
1579         for (i = 0; i < args->count; i++) {
1580                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1581                 if (kvm_is_error_hva(hva)) {
1582                         r = -EFAULT;
1583                         break;
1584                 }
1585
1586                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1587                 if (r)
1588                         break;
1589         }
1590         srcu_read_unlock(&kvm->srcu, srcu_idx);
1591         up_read(&current->mm->mmap_sem);
1592
1593         if (!r) {
1594                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1595                                  sizeof(uint8_t) * args->count);
1596                 if (r)
1597                         r = -EFAULT;
1598         }
1599
1600         kvfree(keys);
1601         return r;
1602 }
1603
1604 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1605 {
1606         uint8_t *keys;
1607         uint64_t hva;
1608         int srcu_idx, i, r = 0;
1609         bool unlocked;
1610
1611         if (args->flags != 0)
1612                 return -EINVAL;
1613
1614         /* Enforce sane limit on memory allocation */
1615         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1616                 return -EINVAL;
1617
1618         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1619         if (!keys)
1620                 return -ENOMEM;
1621
1622         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1623                            sizeof(uint8_t) * args->count);
1624         if (r) {
1625                 r = -EFAULT;
1626                 goto out;
1627         }
1628
1629         /* Enable storage key handling for the guest */
1630         r = s390_enable_skey();
1631         if (r)
1632                 goto out;
1633
1634         i = 0;
1635         down_read(&current->mm->mmap_sem);
1636         srcu_idx = srcu_read_lock(&kvm->srcu);
1637         while (i < args->count) {
1638                 unlocked = false;
1639                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1640                 if (kvm_is_error_hva(hva)) {
1641                         r = -EFAULT;
1642                         break;
1643                 }
1644
1645                 /* Lowest order bit is reserved */
1646                 if (keys[i] & 0x01) {
1647                         r = -EINVAL;
1648                         break;
1649                 }
1650
1651                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1652                 if (r) {
1653                         r = fixup_user_fault(current, current->mm, hva,
1654                                              FAULT_FLAG_WRITE, &unlocked);
1655                         if (r)
1656                                 break;
1657                 }
1658                 if (!r)
1659                         i++;
1660         }
1661         srcu_read_unlock(&kvm->srcu, srcu_idx);
1662         up_read(&current->mm->mmap_sem);
1663 out:
1664         kvfree(keys);
1665         return r;
1666 }
1667
1668 /*
1669  * Base address and length must be sent at the start of each block, therefore
1670  * it's cheaper to send some clean data, as long as it's less than the size of
1671  * two longs.
1672  */
1673 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1674 /* for consistency */
1675 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1676
1677 /*
1678  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1679  * address falls in a hole. In that case the index of one of the memslots
1680  * bordering the hole is returned.
1681  */
1682 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1683 {
1684         int start = 0, end = slots->used_slots;
1685         int slot = atomic_read(&slots->lru_slot);
1686         struct kvm_memory_slot *memslots = slots->memslots;
1687
1688         if (gfn >= memslots[slot].base_gfn &&
1689             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1690                 return slot;
1691
1692         while (start < end) {
1693                 slot = start + (end - start) / 2;
1694
1695                 if (gfn >= memslots[slot].base_gfn)
1696                         end = slot;
1697                 else
1698                         start = slot + 1;
1699         }
1700
1701         if (gfn >= memslots[start].base_gfn &&
1702             gfn < memslots[start].base_gfn + memslots[start].npages) {
1703                 atomic_set(&slots->lru_slot, start);
1704         }
1705
1706         return start;
1707 }
1708
1709 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1710                               u8 *res, unsigned long bufsize)
1711 {
1712         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1713
1714         args->count = 0;
1715         while (args->count < bufsize) {
1716                 hva = gfn_to_hva(kvm, cur_gfn);
1717                 /*
1718                  * We return an error if the first value was invalid, but we
1719                  * return successfully if at least one value was copied.
1720                  */
1721                 if (kvm_is_error_hva(hva))
1722                         return args->count ? 0 : -EFAULT;
1723                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1724                         pgstev = 0;
1725                 res[args->count++] = (pgstev >> 24) & 0x43;
1726                 cur_gfn++;
1727         }
1728
1729         return 0;
1730 }
1731
1732 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1733                                               unsigned long cur_gfn)
1734 {
1735         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1736         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1737         unsigned long ofs = cur_gfn - ms->base_gfn;
1738
1739         if (ms->base_gfn + ms->npages <= cur_gfn) {
1740                 slotidx--;
1741                 /* If we are above the highest slot, wrap around */
1742                 if (slotidx < 0)
1743                         slotidx = slots->used_slots - 1;
1744
1745                 ms = slots->memslots + slotidx;
1746                 ofs = 0;
1747         }
1748         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1749         while ((slotidx > 0) && (ofs >= ms->npages)) {
1750                 slotidx--;
1751                 ms = slots->memslots + slotidx;
1752                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1753         }
1754         return ms->base_gfn + ofs;
1755 }
1756
1757 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1758                              u8 *res, unsigned long bufsize)
1759 {
1760         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1761         struct kvm_memslots *slots = kvm_memslots(kvm);
1762         struct kvm_memory_slot *ms;
1763
1764         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1765         ms = gfn_to_memslot(kvm, cur_gfn);
1766         args->count = 0;
1767         args->start_gfn = cur_gfn;
1768         if (!ms)
1769                 return 0;
1770         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1771         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1772
1773         while (args->count < bufsize) {
1774                 hva = gfn_to_hva(kvm, cur_gfn);
1775                 if (kvm_is_error_hva(hva))
1776                         return 0;
1777                 /* Decrement only if we actually flipped the bit to 0 */
1778                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1779                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1780                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1781                         pgstev = 0;
1782                 /* Save the value */
1783                 res[args->count++] = (pgstev >> 24) & 0x43;
1784                 /* If the next bit is too far away, stop. */
1785                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1786                         return 0;
1787                 /* If we reached the previous "next", find the next one */
1788                 if (cur_gfn == next_gfn)
1789                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1790                 /* Reached the end of memory or of the buffer, stop */
1791                 if ((next_gfn >= mem_end) ||
1792                     (next_gfn - args->start_gfn >= bufsize))
1793                         return 0;
1794                 cur_gfn++;
1795                 /* Reached the end of the current memslot, take the next one. */
1796                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1797                         ms = gfn_to_memslot(kvm, cur_gfn);
1798                         if (!ms)
1799                                 return 0;
1800                 }
1801         }
1802         return 0;
1803 }
1804
1805 /*
1806  * This function searches for the next page with dirty CMMA attributes, and
1807  * saves the attributes in the buffer up to either the end of the buffer or
1808  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1809  * no trailing clean bytes are saved.
1810  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1811  * output buffer will indicate 0 as length.
1812  */
1813 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1814                                   struct kvm_s390_cmma_log *args)
1815 {
1816         unsigned long bufsize;
1817         int srcu_idx, peek, ret;
1818         u8 *values;
1819
1820         if (!kvm->arch.use_cmma)
1821                 return -ENXIO;
1822         /* Invalid/unsupported flags were specified */
1823         if (args->flags & ~KVM_S390_CMMA_PEEK)
1824                 return -EINVAL;
1825         /* Migration mode query, and we are not doing a migration */
1826         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1827         if (!peek && !kvm->arch.migration_mode)
1828                 return -EINVAL;
1829         /* CMMA is disabled or was not used, or the buffer has length zero */
1830         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1831         if (!bufsize || !kvm->mm->context.uses_cmm) {
1832                 memset(args, 0, sizeof(*args));
1833                 return 0;
1834         }
1835         /* We are not peeking, and there are no dirty pages */
1836         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1837                 memset(args, 0, sizeof(*args));
1838                 return 0;
1839         }
1840
1841         values = vmalloc(bufsize);
1842         if (!values)
1843                 return -ENOMEM;
1844
1845         down_read(&kvm->mm->mmap_sem);
1846         srcu_idx = srcu_read_lock(&kvm->srcu);
1847         if (peek)
1848                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1849         else
1850                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1851         srcu_read_unlock(&kvm->srcu, srcu_idx);
1852         up_read(&kvm->mm->mmap_sem);
1853
1854         if (kvm->arch.migration_mode)
1855                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1856         else
1857                 args->remaining = 0;
1858
1859         if (copy_to_user((void __user *)args->values, values, args->count))
1860                 ret = -EFAULT;
1861
1862         vfree(values);
1863         return ret;
1864 }
1865
1866 /*
1867  * This function sets the CMMA attributes for the given pages. If the input
1868  * buffer has zero length, no action is taken, otherwise the attributes are
1869  * set and the mm->context.uses_cmm flag is set.
1870  */
1871 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1872                                   const struct kvm_s390_cmma_log *args)
1873 {
1874         unsigned long hva, mask, pgstev, i;
1875         uint8_t *bits;
1876         int srcu_idx, r = 0;
1877
1878         mask = args->mask;
1879
1880         if (!kvm->arch.use_cmma)
1881                 return -ENXIO;
1882         /* invalid/unsupported flags */
1883         if (args->flags != 0)
1884                 return -EINVAL;
1885         /* Enforce sane limit on memory allocation */
1886         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1887                 return -EINVAL;
1888         /* Nothing to do */
1889         if (args->count == 0)
1890                 return 0;
1891
1892         bits = vmalloc(array_size(sizeof(*bits), args->count));
1893         if (!bits)
1894                 return -ENOMEM;
1895
1896         r = copy_from_user(bits, (void __user *)args->values, args->count);
1897         if (r) {
1898                 r = -EFAULT;
1899                 goto out;
1900         }
1901
1902         down_read(&kvm->mm->mmap_sem);
1903         srcu_idx = srcu_read_lock(&kvm->srcu);
1904         for (i = 0; i < args->count; i++) {
1905                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1906                 if (kvm_is_error_hva(hva)) {
1907                         r = -EFAULT;
1908                         break;
1909                 }
1910
1911                 pgstev = bits[i];
1912                 pgstev = pgstev << 24;
1913                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1914                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1915         }
1916         srcu_read_unlock(&kvm->srcu, srcu_idx);
1917         up_read(&kvm->mm->mmap_sem);
1918
1919         if (!kvm->mm->context.uses_cmm) {
1920                 down_write(&kvm->mm->mmap_sem);
1921                 kvm->mm->context.uses_cmm = 1;
1922                 up_write(&kvm->mm->mmap_sem);
1923         }
1924 out:
1925         vfree(bits);
1926         return r;
1927 }
1928
1929 long kvm_arch_vm_ioctl(struct file *filp,
1930                        unsigned int ioctl, unsigned long arg)
1931 {
1932         struct kvm *kvm = filp->private_data;
1933         void __user *argp = (void __user *)arg;
1934         struct kvm_device_attr attr;
1935         int r;
1936
1937         switch (ioctl) {
1938         case KVM_S390_INTERRUPT: {
1939                 struct kvm_s390_interrupt s390int;
1940
1941                 r = -EFAULT;
1942                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1943                         break;
1944                 r = kvm_s390_inject_vm(kvm, &s390int);
1945                 break;
1946         }
1947         case KVM_ENABLE_CAP: {
1948                 struct kvm_enable_cap cap;
1949                 r = -EFAULT;
1950                 if (copy_from_user(&cap, argp, sizeof(cap)))
1951                         break;
1952                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1953                 break;
1954         }
1955         case KVM_CREATE_IRQCHIP: {
1956                 struct kvm_irq_routing_entry routing;
1957
1958                 r = -EINVAL;
1959                 if (kvm->arch.use_irqchip) {
1960                         /* Set up dummy routing. */
1961                         memset(&routing, 0, sizeof(routing));
1962                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1963                 }
1964                 break;
1965         }
1966         case KVM_SET_DEVICE_ATTR: {
1967                 r = -EFAULT;
1968                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1969                         break;
1970                 r = kvm_s390_vm_set_attr(kvm, &attr);
1971                 break;
1972         }
1973         case KVM_GET_DEVICE_ATTR: {
1974                 r = -EFAULT;
1975                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1976                         break;
1977                 r = kvm_s390_vm_get_attr(kvm, &attr);
1978                 break;
1979         }
1980         case KVM_HAS_DEVICE_ATTR: {
1981                 r = -EFAULT;
1982                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1983                         break;
1984                 r = kvm_s390_vm_has_attr(kvm, &attr);
1985                 break;
1986         }
1987         case KVM_S390_GET_SKEYS: {
1988                 struct kvm_s390_skeys args;
1989
1990                 r = -EFAULT;
1991                 if (copy_from_user(&args, argp,
1992                                    sizeof(struct kvm_s390_skeys)))
1993                         break;
1994                 r = kvm_s390_get_skeys(kvm, &args);
1995                 break;
1996         }
1997         case KVM_S390_SET_SKEYS: {
1998                 struct kvm_s390_skeys args;
1999
2000                 r = -EFAULT;
2001                 if (copy_from_user(&args, argp,
2002                                    sizeof(struct kvm_s390_skeys)))
2003                         break;
2004                 r = kvm_s390_set_skeys(kvm, &args);
2005                 break;
2006         }
2007         case KVM_S390_GET_CMMA_BITS: {
2008                 struct kvm_s390_cmma_log args;
2009
2010                 r = -EFAULT;
2011                 if (copy_from_user(&args, argp, sizeof(args)))
2012                         break;
2013                 mutex_lock(&kvm->slots_lock);
2014                 r = kvm_s390_get_cmma_bits(kvm, &args);
2015                 mutex_unlock(&kvm->slots_lock);
2016                 if (!r) {
2017                         r = copy_to_user(argp, &args, sizeof(args));
2018                         if (r)
2019                                 r = -EFAULT;
2020                 }
2021                 break;
2022         }
2023         case KVM_S390_SET_CMMA_BITS: {
2024                 struct kvm_s390_cmma_log args;
2025
2026                 r = -EFAULT;
2027                 if (copy_from_user(&args, argp, sizeof(args)))
2028                         break;
2029                 mutex_lock(&kvm->slots_lock);
2030                 r = kvm_s390_set_cmma_bits(kvm, &args);
2031                 mutex_unlock(&kvm->slots_lock);
2032                 break;
2033         }
2034         default:
2035                 r = -ENOTTY;
2036         }
2037
2038         return r;
2039 }
2040
2041 static int kvm_s390_apxa_installed(void)
2042 {
2043         struct ap_config_info info;
2044
2045         if (ap_instructions_available()) {
2046                 if (ap_qci(&info) == 0)
2047                         return info.apxa;
2048         }
2049
2050         return 0;
2051 }
2052
2053 /*
2054  * The format of the crypto control block (CRYCB) is specified in the 3 low
2055  * order bits of the CRYCB designation (CRYCBD) field as follows:
2056  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2057  *           AP extended addressing (APXA) facility are installed.
2058  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2059  * Format 2: Both the APXA and MSAX3 facilities are installed
2060  */
2061 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2062 {
2063         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2064
2065         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2066         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2067
2068         /* Check whether MSAX3 is installed */
2069         if (!test_kvm_facility(kvm, 76))
2070                 return;
2071
2072         if (kvm_s390_apxa_installed())
2073                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2074         else
2075                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2076 }
2077
2078 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2079                                unsigned long *aqm, unsigned long *adm)
2080 {
2081         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2082
2083         mutex_lock(&kvm->lock);
2084         kvm_s390_vcpu_block_all(kvm);
2085
2086         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2087         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2088                 memcpy(crycb->apcb1.apm, apm, 32);
2089                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2090                          apm[0], apm[1], apm[2], apm[3]);
2091                 memcpy(crycb->apcb1.aqm, aqm, 32);
2092                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2093                          aqm[0], aqm[1], aqm[2], aqm[3]);
2094                 memcpy(crycb->apcb1.adm, adm, 32);
2095                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2096                          adm[0], adm[1], adm[2], adm[3]);
2097                 break;
2098         case CRYCB_FORMAT1:
2099         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2100                 memcpy(crycb->apcb0.apm, apm, 8);
2101                 memcpy(crycb->apcb0.aqm, aqm, 2);
2102                 memcpy(crycb->apcb0.adm, adm, 2);
2103                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2104                          apm[0], *((unsigned short *)aqm),
2105                          *((unsigned short *)adm));
2106                 break;
2107         default:        /* Can not happen */
2108                 break;
2109         }
2110
2111         /* recreate the shadow crycb for each vcpu */
2112         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2113         kvm_s390_vcpu_unblock_all(kvm);
2114         mutex_unlock(&kvm->lock);
2115 }
2116 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2117
2118 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2119 {
2120         mutex_lock(&kvm->lock);
2121         kvm_s390_vcpu_block_all(kvm);
2122
2123         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2124                sizeof(kvm->arch.crypto.crycb->apcb0));
2125         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2126                sizeof(kvm->arch.crypto.crycb->apcb1));
2127
2128         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2129         /* recreate the shadow crycb for each vcpu */
2130         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2131         kvm_s390_vcpu_unblock_all(kvm);
2132         mutex_unlock(&kvm->lock);
2133 }
2134 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2135
2136 static u64 kvm_s390_get_initial_cpuid(void)
2137 {
2138         struct cpuid cpuid;
2139
2140         get_cpu_id(&cpuid);
2141         cpuid.version = 0xff;
2142         return *((u64 *) &cpuid);
2143 }
2144
2145 static void kvm_s390_crypto_init(struct kvm *kvm)
2146 {
2147         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2148         kvm_s390_set_crycb_format(kvm);
2149
2150         if (!test_kvm_facility(kvm, 76))
2151                 return;
2152
2153         /* Enable AES/DEA protected key functions by default */
2154         kvm->arch.crypto.aes_kw = 1;
2155         kvm->arch.crypto.dea_kw = 1;
2156         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2157                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2158         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2159                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2160 }
2161
2162 static void sca_dispose(struct kvm *kvm)
2163 {
2164         if (kvm->arch.use_esca)
2165                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2166         else
2167                 free_page((unsigned long)(kvm->arch.sca));
2168         kvm->arch.sca = NULL;
2169 }
2170
2171 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2172 {
2173         gfp_t alloc_flags = GFP_KERNEL;
2174         int i, rc;
2175         char debug_name[16];
2176         static unsigned long sca_offset;
2177
2178         rc = -EINVAL;
2179 #ifdef CONFIG_KVM_S390_UCONTROL
2180         if (type & ~KVM_VM_S390_UCONTROL)
2181                 goto out_err;
2182         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2183                 goto out_err;
2184 #else
2185         if (type)
2186                 goto out_err;
2187 #endif
2188
2189         rc = s390_enable_sie();
2190         if (rc)
2191                 goto out_err;
2192
2193         rc = -ENOMEM;
2194
2195         if (!sclp.has_64bscao)
2196                 alloc_flags |= GFP_DMA;
2197         rwlock_init(&kvm->arch.sca_lock);
2198         /* start with basic SCA */
2199         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2200         if (!kvm->arch.sca)
2201                 goto out_err;
2202         spin_lock(&kvm_lock);
2203         sca_offset += 16;
2204         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2205                 sca_offset = 0;
2206         kvm->arch.sca = (struct bsca_block *)
2207                         ((char *) kvm->arch.sca + sca_offset);
2208         spin_unlock(&kvm_lock);
2209
2210         sprintf(debug_name, "kvm-%u", current->pid);
2211
2212         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2213         if (!kvm->arch.dbf)
2214                 goto out_err;
2215
2216         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2217         kvm->arch.sie_page2 =
2218              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2219         if (!kvm->arch.sie_page2)
2220                 goto out_err;
2221
2222         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2223
2224         for (i = 0; i < kvm_s390_fac_size(); i++) {
2225                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2226                                               (kvm_s390_fac_base[i] |
2227                                                kvm_s390_fac_ext[i]);
2228                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2229                                               kvm_s390_fac_base[i];
2230         }
2231
2232         /* we are always in czam mode - even on pre z14 machines */
2233         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2234         set_kvm_facility(kvm->arch.model.fac_list, 138);
2235         /* we emulate STHYI in kvm */
2236         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2237         set_kvm_facility(kvm->arch.model.fac_list, 74);
2238         if (MACHINE_HAS_TLB_GUEST) {
2239                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2240                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2241         }
2242
2243         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2244         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2245
2246         kvm_s390_crypto_init(kvm);
2247
2248         mutex_init(&kvm->arch.float_int.ais_lock);
2249         spin_lock_init(&kvm->arch.float_int.lock);
2250         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2251                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2252         init_waitqueue_head(&kvm->arch.ipte_wq);
2253         mutex_init(&kvm->arch.ipte_mutex);
2254
2255         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2256         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2257
2258         if (type & KVM_VM_S390_UCONTROL) {
2259                 kvm->arch.gmap = NULL;
2260                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2261         } else {
2262                 if (sclp.hamax == U64_MAX)
2263                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2264                 else
2265                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2266                                                     sclp.hamax + 1);
2267                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2268                 if (!kvm->arch.gmap)
2269                         goto out_err;
2270                 kvm->arch.gmap->private = kvm;
2271                 kvm->arch.gmap->pfault_enabled = 0;
2272         }
2273
2274         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2275         kvm->arch.use_skf = sclp.has_skey;
2276         spin_lock_init(&kvm->arch.start_stop_lock);
2277         kvm_s390_vsie_init(kvm);
2278         kvm_s390_gisa_init(kvm);
2279         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2280
2281         return 0;
2282 out_err:
2283         free_page((unsigned long)kvm->arch.sie_page2);
2284         debug_unregister(kvm->arch.dbf);
2285         sca_dispose(kvm);
2286         KVM_EVENT(3, "creation of vm failed: %d", rc);
2287         return rc;
2288 }
2289
2290 bool kvm_arch_has_vcpu_debugfs(void)
2291 {
2292         return false;
2293 }
2294
2295 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2296 {
2297         return 0;
2298 }
2299
2300 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2301 {
2302         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2303         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2304         kvm_s390_clear_local_irqs(vcpu);
2305         kvm_clear_async_pf_completion_queue(vcpu);
2306         if (!kvm_is_ucontrol(vcpu->kvm))
2307                 sca_del_vcpu(vcpu);
2308
2309         if (kvm_is_ucontrol(vcpu->kvm))
2310                 gmap_remove(vcpu->arch.gmap);
2311
2312         if (vcpu->kvm->arch.use_cmma)
2313                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2314         free_page((unsigned long)(vcpu->arch.sie_block));
2315
2316         kvm_vcpu_uninit(vcpu);
2317         kmem_cache_free(kvm_vcpu_cache, vcpu);
2318 }
2319
2320 static void kvm_free_vcpus(struct kvm *kvm)
2321 {
2322         unsigned int i;
2323         struct kvm_vcpu *vcpu;
2324
2325         kvm_for_each_vcpu(i, vcpu, kvm)
2326                 kvm_arch_vcpu_destroy(vcpu);
2327
2328         mutex_lock(&kvm->lock);
2329         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2330                 kvm->vcpus[i] = NULL;
2331
2332         atomic_set(&kvm->online_vcpus, 0);
2333         mutex_unlock(&kvm->lock);
2334 }
2335
2336 void kvm_arch_destroy_vm(struct kvm *kvm)
2337 {
2338         kvm_free_vcpus(kvm);
2339         sca_dispose(kvm);
2340         debug_unregister(kvm->arch.dbf);
2341         kvm_s390_gisa_destroy(kvm);
2342         free_page((unsigned long)kvm->arch.sie_page2);
2343         if (!kvm_is_ucontrol(kvm))
2344                 gmap_remove(kvm->arch.gmap);
2345         kvm_s390_destroy_adapters(kvm);
2346         kvm_s390_clear_float_irqs(kvm);
2347         kvm_s390_vsie_destroy(kvm);
2348         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2349 }
2350
2351 /* Section: vcpu related */
2352 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2353 {
2354         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2355         if (!vcpu->arch.gmap)
2356                 return -ENOMEM;
2357         vcpu->arch.gmap->private = vcpu->kvm;
2358
2359         return 0;
2360 }
2361
2362 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2363 {
2364         if (!kvm_s390_use_sca_entries())
2365                 return;
2366         read_lock(&vcpu->kvm->arch.sca_lock);
2367         if (vcpu->kvm->arch.use_esca) {
2368                 struct esca_block *sca = vcpu->kvm->arch.sca;
2369
2370                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2371                 sca->cpu[vcpu->vcpu_id].sda = 0;
2372         } else {
2373                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2374
2375                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2376                 sca->cpu[vcpu->vcpu_id].sda = 0;
2377         }
2378         read_unlock(&vcpu->kvm->arch.sca_lock);
2379 }
2380
2381 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2382 {
2383         if (!kvm_s390_use_sca_entries()) {
2384                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2385
2386                 /* we still need the basic sca for the ipte control */
2387                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2388                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2389                 return;
2390         }
2391         read_lock(&vcpu->kvm->arch.sca_lock);
2392         if (vcpu->kvm->arch.use_esca) {
2393                 struct esca_block *sca = vcpu->kvm->arch.sca;
2394
2395                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2396                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2397                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2398                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2399                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2400         } else {
2401                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2402
2403                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2404                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2405                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2406                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2407         }
2408         read_unlock(&vcpu->kvm->arch.sca_lock);
2409 }
2410
2411 /* Basic SCA to Extended SCA data copy routines */
2412 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2413 {
2414         d->sda = s->sda;
2415         d->sigp_ctrl.c = s->sigp_ctrl.c;
2416         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2417 }
2418
2419 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2420 {
2421         int i;
2422
2423         d->ipte_control = s->ipte_control;
2424         d->mcn[0] = s->mcn;
2425         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2426                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2427 }
2428
2429 static int sca_switch_to_extended(struct kvm *kvm)
2430 {
2431         struct bsca_block *old_sca = kvm->arch.sca;
2432         struct esca_block *new_sca;
2433         struct kvm_vcpu *vcpu;
2434         unsigned int vcpu_idx;
2435         u32 scaol, scaoh;
2436
2437         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2438         if (!new_sca)
2439                 return -ENOMEM;
2440
2441         scaoh = (u32)((u64)(new_sca) >> 32);
2442         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2443
2444         kvm_s390_vcpu_block_all(kvm);
2445         write_lock(&kvm->arch.sca_lock);
2446
2447         sca_copy_b_to_e(new_sca, old_sca);
2448
2449         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2450                 vcpu->arch.sie_block->scaoh = scaoh;
2451                 vcpu->arch.sie_block->scaol = scaol;
2452                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2453         }
2454         kvm->arch.sca = new_sca;
2455         kvm->arch.use_esca = 1;
2456
2457         write_unlock(&kvm->arch.sca_lock);
2458         kvm_s390_vcpu_unblock_all(kvm);
2459
2460         free_page((unsigned long)old_sca);
2461
2462         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2463                  old_sca, kvm->arch.sca);
2464         return 0;
2465 }
2466
2467 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2468 {
2469         int rc;
2470
2471         if (!kvm_s390_use_sca_entries()) {
2472                 if (id < KVM_MAX_VCPUS)
2473                         return true;
2474                 return false;
2475         }
2476         if (id < KVM_S390_BSCA_CPU_SLOTS)
2477                 return true;
2478         if (!sclp.has_esca || !sclp.has_64bscao)
2479                 return false;
2480
2481         mutex_lock(&kvm->lock);
2482         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2483         mutex_unlock(&kvm->lock);
2484
2485         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2486 }
2487
2488 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2489 {
2490         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2491         kvm_clear_async_pf_completion_queue(vcpu);
2492         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2493                                     KVM_SYNC_GPRS |
2494                                     KVM_SYNC_ACRS |
2495                                     KVM_SYNC_CRS |
2496                                     KVM_SYNC_ARCH0 |
2497                                     KVM_SYNC_PFAULT;
2498         kvm_s390_set_prefix(vcpu, 0);
2499         if (test_kvm_facility(vcpu->kvm, 64))
2500                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2501         if (test_kvm_facility(vcpu->kvm, 82))
2502                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2503         if (test_kvm_facility(vcpu->kvm, 133))
2504                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2505         if (test_kvm_facility(vcpu->kvm, 156))
2506                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2507         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2508          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2509          */
2510         if (MACHINE_HAS_VX)
2511                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2512         else
2513                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2514
2515         if (kvm_is_ucontrol(vcpu->kvm))
2516                 return __kvm_ucontrol_vcpu_init(vcpu);
2517
2518         return 0;
2519 }
2520
2521 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2522 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2523 {
2524         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2525         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2526         vcpu->arch.cputm_start = get_tod_clock_fast();
2527         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2528 }
2529
2530 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2531 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2532 {
2533         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2534         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2535         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2536         vcpu->arch.cputm_start = 0;
2537         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2538 }
2539
2540 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2541 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2542 {
2543         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2544         vcpu->arch.cputm_enabled = true;
2545         __start_cpu_timer_accounting(vcpu);
2546 }
2547
2548 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2549 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2550 {
2551         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2552         __stop_cpu_timer_accounting(vcpu);
2553         vcpu->arch.cputm_enabled = false;
2554 }
2555
2556 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2557 {
2558         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2559         __enable_cpu_timer_accounting(vcpu);
2560         preempt_enable();
2561 }
2562
2563 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2564 {
2565         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2566         __disable_cpu_timer_accounting(vcpu);
2567         preempt_enable();
2568 }
2569
2570 /* set the cpu timer - may only be called from the VCPU thread itself */
2571 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2572 {
2573         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2574         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2575         if (vcpu->arch.cputm_enabled)
2576                 vcpu->arch.cputm_start = get_tod_clock_fast();
2577         vcpu->arch.sie_block->cputm = cputm;
2578         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2579         preempt_enable();
2580 }
2581
2582 /* update and get the cpu timer - can also be called from other VCPU threads */
2583 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2584 {
2585         unsigned int seq;
2586         __u64 value;
2587
2588         if (unlikely(!vcpu->arch.cputm_enabled))
2589                 return vcpu->arch.sie_block->cputm;
2590
2591         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2592         do {
2593                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2594                 /*
2595                  * If the writer would ever execute a read in the critical
2596                  * section, e.g. in irq context, we have a deadlock.
2597                  */
2598                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2599                 value = vcpu->arch.sie_block->cputm;
2600                 /* if cputm_start is 0, accounting is being started/stopped */
2601                 if (likely(vcpu->arch.cputm_start))
2602                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2603         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2604         preempt_enable();
2605         return value;
2606 }
2607
2608 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2609 {
2610
2611         gmap_enable(vcpu->arch.enabled_gmap);
2612         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2613         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2614                 __start_cpu_timer_accounting(vcpu);
2615         vcpu->cpu = cpu;
2616 }
2617
2618 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2619 {
2620         vcpu->cpu = -1;
2621         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2622                 __stop_cpu_timer_accounting(vcpu);
2623         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2624         vcpu->arch.enabled_gmap = gmap_get_enabled();
2625         gmap_disable(vcpu->arch.enabled_gmap);
2626
2627 }
2628
2629 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2630 {
2631         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2632         vcpu->arch.sie_block->gpsw.mask = 0UL;
2633         vcpu->arch.sie_block->gpsw.addr = 0UL;
2634         kvm_s390_set_prefix(vcpu, 0);
2635         kvm_s390_set_cpu_timer(vcpu, 0);
2636         vcpu->arch.sie_block->ckc       = 0UL;
2637         vcpu->arch.sie_block->todpr     = 0;
2638         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2639         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2640                                         CR0_INTERRUPT_KEY_SUBMASK |
2641                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2642         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2643                                         CR14_UNUSED_33 |
2644                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2645         /* make sure the new fpc will be lazily loaded */
2646         save_fpu_regs();
2647         current->thread.fpu.fpc = 0;
2648         vcpu->arch.sie_block->gbea = 1;
2649         vcpu->arch.sie_block->pp = 0;
2650         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2651         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2652         kvm_clear_async_pf_completion_queue(vcpu);
2653         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2654                 kvm_s390_vcpu_stop(vcpu);
2655         kvm_s390_clear_local_irqs(vcpu);
2656 }
2657
2658 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2659 {
2660         mutex_lock(&vcpu->kvm->lock);
2661         preempt_disable();
2662         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2663         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2664         preempt_enable();
2665         mutex_unlock(&vcpu->kvm->lock);
2666         if (!kvm_is_ucontrol(vcpu->kvm)) {
2667                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2668                 sca_add_vcpu(vcpu);
2669         }
2670         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2671                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2672         /* make vcpu_load load the right gmap on the first trigger */
2673         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2674 }
2675
2676 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2677 {
2678         /*
2679          * If the AP instructions are not being interpreted and the MSAX3
2680          * facility is not configured for the guest, there is nothing to set up.
2681          */
2682         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2683                 return;
2684
2685         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2686         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2687         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2688
2689         if (vcpu->kvm->arch.crypto.apie)
2690                 vcpu->arch.sie_block->eca |= ECA_APIE;
2691
2692         /* Set up protected key support */
2693         if (vcpu->kvm->arch.crypto.aes_kw)
2694                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2695         if (vcpu->kvm->arch.crypto.dea_kw)
2696                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2697 }
2698
2699 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2700 {
2701         free_page(vcpu->arch.sie_block->cbrlo);
2702         vcpu->arch.sie_block->cbrlo = 0;
2703 }
2704
2705 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2706 {
2707         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2708         if (!vcpu->arch.sie_block->cbrlo)
2709                 return -ENOMEM;
2710         return 0;
2711 }
2712
2713 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2714 {
2715         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2716
2717         vcpu->arch.sie_block->ibc = model->ibc;
2718         if (test_kvm_facility(vcpu->kvm, 7))
2719                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2720 }
2721
2722 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2723 {
2724         int rc = 0;
2725
2726         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2727                                                     CPUSTAT_SM |
2728                                                     CPUSTAT_STOPPED);
2729
2730         if (test_kvm_facility(vcpu->kvm, 78))
2731                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2732         else if (test_kvm_facility(vcpu->kvm, 8))
2733                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2734
2735         kvm_s390_vcpu_setup_model(vcpu);
2736
2737         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2738         if (MACHINE_HAS_ESOP)
2739                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2740         if (test_kvm_facility(vcpu->kvm, 9))
2741                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2742         if (test_kvm_facility(vcpu->kvm, 73))
2743                 vcpu->arch.sie_block->ecb |= ECB_TE;
2744
2745         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2746                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2747         if (test_kvm_facility(vcpu->kvm, 130))
2748                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2749         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2750         if (sclp.has_cei)
2751                 vcpu->arch.sie_block->eca |= ECA_CEI;
2752         if (sclp.has_ib)
2753                 vcpu->arch.sie_block->eca |= ECA_IB;
2754         if (sclp.has_siif)
2755                 vcpu->arch.sie_block->eca |= ECA_SII;
2756         if (sclp.has_sigpif)
2757                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2758         if (test_kvm_facility(vcpu->kvm, 129)) {
2759                 vcpu->arch.sie_block->eca |= ECA_VX;
2760                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2761         }
2762         if (test_kvm_facility(vcpu->kvm, 139))
2763                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2764         if (test_kvm_facility(vcpu->kvm, 156))
2765                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2766         if (vcpu->arch.sie_block->gd) {
2767                 vcpu->arch.sie_block->eca |= ECA_AIV;
2768                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2769                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2770         }
2771         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2772                                         | SDNXC;
2773         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2774
2775         if (sclp.has_kss)
2776                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2777         else
2778                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2779
2780         if (vcpu->kvm->arch.use_cmma) {
2781                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2782                 if (rc)
2783                         return rc;
2784         }
2785         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2786         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2787
2788         vcpu->arch.sie_block->hpid = HPID_KVM;
2789
2790         kvm_s390_vcpu_crypto_setup(vcpu);
2791
2792         return rc;
2793 }
2794
2795 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2796                                       unsigned int id)
2797 {
2798         struct kvm_vcpu *vcpu;
2799         struct sie_page *sie_page;
2800         int rc = -EINVAL;
2801
2802         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2803                 goto out;
2804
2805         rc = -ENOMEM;
2806
2807         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2808         if (!vcpu)
2809                 goto out;
2810
2811         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2812         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2813         if (!sie_page)
2814                 goto out_free_cpu;
2815
2816         vcpu->arch.sie_block = &sie_page->sie_block;
2817         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2818
2819         /* the real guest size will always be smaller than msl */
2820         vcpu->arch.sie_block->mso = 0;
2821         vcpu->arch.sie_block->msl = sclp.hamax;
2822
2823         vcpu->arch.sie_block->icpua = id;
2824         spin_lock_init(&vcpu->arch.local_int.lock);
2825         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2826         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2827                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2828         seqcount_init(&vcpu->arch.cputm_seqcount);
2829
2830         rc = kvm_vcpu_init(vcpu, kvm, id);
2831         if (rc)
2832                 goto out_free_sie_block;
2833         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2834                  vcpu->arch.sie_block);
2835         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2836
2837         return vcpu;
2838 out_free_sie_block:
2839         free_page((unsigned long)(vcpu->arch.sie_block));
2840 out_free_cpu:
2841         kmem_cache_free(kvm_vcpu_cache, vcpu);
2842 out:
2843         return ERR_PTR(rc);
2844 }
2845
2846 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2847 {
2848         return kvm_s390_vcpu_has_irq(vcpu, 0);
2849 }
2850
2851 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2852 {
2853         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2854 }
2855
2856 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2857 {
2858         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2859         exit_sie(vcpu);
2860 }
2861
2862 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2863 {
2864         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2865 }
2866
2867 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2868 {
2869         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2870         exit_sie(vcpu);
2871 }
2872
2873 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2874 {
2875         return atomic_read(&vcpu->arch.sie_block->prog20) &
2876                (PROG_BLOCK_SIE | PROG_REQUEST);
2877 }
2878
2879 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2880 {
2881         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2882 }
2883
2884 /*
2885  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2886  * If the CPU is not running (e.g. waiting as idle) the function will
2887  * return immediately. */
2888 void exit_sie(struct kvm_vcpu *vcpu)
2889 {
2890         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2891         kvm_s390_vsie_kick(vcpu);
2892         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2893                 cpu_relax();
2894 }
2895
2896 /* Kick a guest cpu out of SIE to process a request synchronously */
2897 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2898 {
2899         kvm_make_request(req, vcpu);
2900         kvm_s390_vcpu_request(vcpu);
2901 }
2902
2903 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2904                               unsigned long end)
2905 {
2906         struct kvm *kvm = gmap->private;
2907         struct kvm_vcpu *vcpu;
2908         unsigned long prefix;
2909         int i;
2910
2911         if (gmap_is_shadow(gmap))
2912                 return;
2913         if (start >= 1UL << 31)
2914                 /* We are only interested in prefix pages */
2915                 return;
2916         kvm_for_each_vcpu(i, vcpu, kvm) {
2917                 /* match against both prefix pages */
2918                 prefix = kvm_s390_get_prefix(vcpu);
2919                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2920                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2921                                    start, end);
2922                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2923                 }
2924         }
2925 }
2926
2927 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2928 {
2929         /* kvm common code refers to this, but never calls it */
2930         BUG();
2931         return 0;
2932 }
2933
2934 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2935                                            struct kvm_one_reg *reg)
2936 {
2937         int r = -EINVAL;
2938
2939         switch (reg->id) {
2940         case KVM_REG_S390_TODPR:
2941                 r = put_user(vcpu->arch.sie_block->todpr,
2942                              (u32 __user *)reg->addr);
2943                 break;
2944         case KVM_REG_S390_EPOCHDIFF:
2945                 r = put_user(vcpu->arch.sie_block->epoch,
2946                              (u64 __user *)reg->addr);
2947                 break;
2948         case KVM_REG_S390_CPU_TIMER:
2949                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2950                              (u64 __user *)reg->addr);
2951                 break;
2952         case KVM_REG_S390_CLOCK_COMP:
2953                 r = put_user(vcpu->arch.sie_block->ckc,
2954                              (u64 __user *)reg->addr);
2955                 break;
2956         case KVM_REG_S390_PFTOKEN:
2957                 r = put_user(vcpu->arch.pfault_token,
2958                              (u64 __user *)reg->addr);
2959                 break;
2960         case KVM_REG_S390_PFCOMPARE:
2961                 r = put_user(vcpu->arch.pfault_compare,
2962                              (u64 __user *)reg->addr);
2963                 break;
2964         case KVM_REG_S390_PFSELECT:
2965                 r = put_user(vcpu->arch.pfault_select,
2966                              (u64 __user *)reg->addr);
2967                 break;
2968         case KVM_REG_S390_PP:
2969                 r = put_user(vcpu->arch.sie_block->pp,
2970                              (u64 __user *)reg->addr);
2971                 break;
2972         case KVM_REG_S390_GBEA:
2973                 r = put_user(vcpu->arch.sie_block->gbea,
2974                              (u64 __user *)reg->addr);
2975                 break;
2976         default:
2977                 break;
2978         }
2979
2980         return r;
2981 }
2982
2983 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2984                                            struct kvm_one_reg *reg)
2985 {
2986         int r = -EINVAL;
2987         __u64 val;
2988
2989         switch (reg->id) {
2990         case KVM_REG_S390_TODPR:
2991                 r = get_user(vcpu->arch.sie_block->todpr,
2992                              (u32 __user *)reg->addr);
2993                 break;
2994         case KVM_REG_S390_EPOCHDIFF:
2995                 r = get_user(vcpu->arch.sie_block->epoch,
2996                              (u64 __user *)reg->addr);
2997                 break;
2998         case KVM_REG_S390_CPU_TIMER:
2999                 r = get_user(val, (u64 __user *)reg->addr);
3000                 if (!r)
3001                         kvm_s390_set_cpu_timer(vcpu, val);
3002                 break;
3003         case KVM_REG_S390_CLOCK_COMP:
3004                 r = get_user(vcpu->arch.sie_block->ckc,
3005                              (u64 __user *)reg->addr);
3006                 break;
3007         case KVM_REG_S390_PFTOKEN:
3008                 r = get_user(vcpu->arch.pfault_token,
3009                              (u64 __user *)reg->addr);
3010                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3011                         kvm_clear_async_pf_completion_queue(vcpu);
3012                 break;
3013         case KVM_REG_S390_PFCOMPARE:
3014                 r = get_user(vcpu->arch.pfault_compare,
3015                              (u64 __user *)reg->addr);
3016                 break;
3017         case KVM_REG_S390_PFSELECT:
3018                 r = get_user(vcpu->arch.pfault_select,
3019                              (u64 __user *)reg->addr);
3020                 break;
3021         case KVM_REG_S390_PP:
3022                 r = get_user(vcpu->arch.sie_block->pp,
3023                              (u64 __user *)reg->addr);
3024                 break;
3025         case KVM_REG_S390_GBEA:
3026                 r = get_user(vcpu->arch.sie_block->gbea,
3027                              (u64 __user *)reg->addr);
3028                 break;
3029         default:
3030                 break;
3031         }
3032
3033         return r;
3034 }
3035
3036 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3037 {
3038         kvm_s390_vcpu_initial_reset(vcpu);
3039         return 0;
3040 }
3041
3042 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3043 {
3044         vcpu_load(vcpu);
3045         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3046         vcpu_put(vcpu);
3047         return 0;
3048 }
3049
3050 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3051 {
3052         vcpu_load(vcpu);
3053         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3054         vcpu_put(vcpu);
3055         return 0;
3056 }
3057
3058 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3059                                   struct kvm_sregs *sregs)
3060 {
3061         vcpu_load(vcpu);
3062
3063         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3064         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3065
3066         vcpu_put(vcpu);
3067         return 0;
3068 }
3069
3070 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3071                                   struct kvm_sregs *sregs)
3072 {
3073         vcpu_load(vcpu);
3074
3075         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3076         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3077
3078         vcpu_put(vcpu);
3079         return 0;
3080 }
3081
3082 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3083 {
3084         int ret = 0;
3085
3086         vcpu_load(vcpu);
3087
3088         if (test_fp_ctl(fpu->fpc)) {
3089                 ret = -EINVAL;
3090                 goto out;
3091         }
3092         vcpu->run->s.regs.fpc = fpu->fpc;
3093         if (MACHINE_HAS_VX)
3094                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3095                                  (freg_t *) fpu->fprs);
3096         else
3097                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3098
3099 out:
3100         vcpu_put(vcpu);
3101         return ret;
3102 }
3103
3104 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3105 {
3106         vcpu_load(vcpu);
3107
3108         /* make sure we have the latest values */
3109         save_fpu_regs();
3110         if (MACHINE_HAS_VX)
3111                 convert_vx_to_fp((freg_t *) fpu->fprs,
3112                                  (__vector128 *) vcpu->run->s.regs.vrs);
3113         else
3114                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3115         fpu->fpc = vcpu->run->s.regs.fpc;
3116
3117         vcpu_put(vcpu);
3118         return 0;
3119 }
3120
3121 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3122 {
3123         int rc = 0;
3124
3125         if (!is_vcpu_stopped(vcpu))
3126                 rc = -EBUSY;
3127         else {
3128                 vcpu->run->psw_mask = psw.mask;
3129                 vcpu->run->psw_addr = psw.addr;
3130         }
3131         return rc;
3132 }
3133
3134 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3135                                   struct kvm_translation *tr)
3136 {
3137         return -EINVAL; /* not implemented yet */
3138 }
3139
3140 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3141                               KVM_GUESTDBG_USE_HW_BP | \
3142                               KVM_GUESTDBG_ENABLE)
3143
3144 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3145                                         struct kvm_guest_debug *dbg)
3146 {
3147         int rc = 0;
3148
3149         vcpu_load(vcpu);
3150
3151         vcpu->guest_debug = 0;
3152         kvm_s390_clear_bp_data(vcpu);
3153
3154         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3155                 rc = -EINVAL;
3156                 goto out;
3157         }
3158         if (!sclp.has_gpere) {
3159                 rc = -EINVAL;
3160                 goto out;
3161         }
3162
3163         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3164                 vcpu->guest_debug = dbg->control;
3165                 /* enforce guest PER */
3166                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3167
3168                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3169                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3170         } else {
3171                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3172                 vcpu->arch.guestdbg.last_bp = 0;
3173         }
3174
3175         if (rc) {
3176                 vcpu->guest_debug = 0;
3177                 kvm_s390_clear_bp_data(vcpu);
3178                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3179         }
3180
3181 out:
3182         vcpu_put(vcpu);
3183         return rc;
3184 }
3185
3186 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3187                                     struct kvm_mp_state *mp_state)
3188 {
3189         int ret;
3190
3191         vcpu_load(vcpu);
3192
3193         /* CHECK_STOP and LOAD are not supported yet */
3194         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3195                                       KVM_MP_STATE_OPERATING;
3196
3197         vcpu_put(vcpu);
3198         return ret;
3199 }
3200
3201 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3202                                     struct kvm_mp_state *mp_state)
3203 {
3204         int rc = 0;
3205
3206         vcpu_load(vcpu);
3207
3208         /* user space knows about this interface - let it control the state */
3209         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3210
3211         switch (mp_state->mp_state) {
3212         case KVM_MP_STATE_STOPPED:
3213                 kvm_s390_vcpu_stop(vcpu);
3214                 break;
3215         case KVM_MP_STATE_OPERATING:
3216                 kvm_s390_vcpu_start(vcpu);
3217                 break;
3218         case KVM_MP_STATE_LOAD:
3219         case KVM_MP_STATE_CHECK_STOP:
3220                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3221         default:
3222                 rc = -ENXIO;
3223         }
3224
3225         vcpu_put(vcpu);
3226         return rc;
3227 }
3228
3229 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3230 {
3231         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3232 }
3233
3234 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3235 {
3236 retry:
3237         kvm_s390_vcpu_request_handled(vcpu);
3238         if (!kvm_request_pending(vcpu))
3239                 return 0;
3240         /*
3241          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3242          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3243          * This ensures that the ipte instruction for this request has
3244          * already finished. We might race against a second unmapper that
3245          * wants to set the blocking bit. Lets just retry the request loop.
3246          */
3247         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3248                 int rc;
3249                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3250                                           kvm_s390_get_prefix(vcpu),
3251                                           PAGE_SIZE * 2, PROT_WRITE);
3252                 if (rc) {
3253                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3254                         return rc;
3255                 }
3256                 goto retry;
3257         }
3258
3259         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3260                 vcpu->arch.sie_block->ihcpu = 0xffff;
3261                 goto retry;
3262         }
3263
3264         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3265                 if (!ibs_enabled(vcpu)) {
3266                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3267                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3268                 }
3269                 goto retry;
3270         }
3271
3272         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3273                 if (ibs_enabled(vcpu)) {
3274                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3275                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3276                 }
3277                 goto retry;
3278         }
3279
3280         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3281                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3282                 goto retry;
3283         }
3284
3285         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3286                 /*
3287                  * Disable CMM virtualization; we will emulate the ESSA
3288                  * instruction manually, in order to provide additional
3289                  * functionalities needed for live migration.
3290                  */
3291                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3292                 goto retry;
3293         }
3294
3295         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3296                 /*
3297                  * Re-enable CMM virtualization if CMMA is available and
3298                  * CMM has been used.
3299                  */
3300                 if ((vcpu->kvm->arch.use_cmma) &&
3301                     (vcpu->kvm->mm->context.uses_cmm))
3302                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3303                 goto retry;
3304         }
3305
3306         /* nothing to do, just clear the request */
3307         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3308         /* we left the vsie handler, nothing to do, just clear the request */
3309         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3310
3311         return 0;
3312 }
3313
3314 void kvm_s390_set_tod_clock(struct kvm *kvm,
3315                             const struct kvm_s390_vm_tod_clock *gtod)
3316 {
3317         struct kvm_vcpu *vcpu;
3318         struct kvm_s390_tod_clock_ext htod;
3319         int i;
3320
3321         mutex_lock(&kvm->lock);
3322         preempt_disable();
3323
3324         get_tod_clock_ext((char *)&htod);
3325
3326         kvm->arch.epoch = gtod->tod - htod.tod;
3327         kvm->arch.epdx = 0;
3328         if (test_kvm_facility(kvm, 139)) {
3329                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3330                 if (kvm->arch.epoch > gtod->tod)
3331                         kvm->arch.epdx -= 1;
3332         }
3333
3334         kvm_s390_vcpu_block_all(kvm);
3335         kvm_for_each_vcpu(i, vcpu, kvm) {
3336                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3337                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3338         }
3339
3340         kvm_s390_vcpu_unblock_all(kvm);
3341         preempt_enable();
3342         mutex_unlock(&kvm->lock);
3343 }
3344
3345 /**
3346  * kvm_arch_fault_in_page - fault-in guest page if necessary
3347  * @vcpu: The corresponding virtual cpu
3348  * @gpa: Guest physical address
3349  * @writable: Whether the page should be writable or not
3350  *
3351  * Make sure that a guest page has been faulted-in on the host.
3352  *
3353  * Return: Zero on success, negative error code otherwise.
3354  */
3355 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3356 {
3357         return gmap_fault(vcpu->arch.gmap, gpa,
3358                           writable ? FAULT_FLAG_WRITE : 0);
3359 }
3360
3361 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3362                                       unsigned long token)
3363 {
3364         struct kvm_s390_interrupt inti;
3365         struct kvm_s390_irq irq;
3366
3367         if (start_token) {
3368                 irq.u.ext.ext_params2 = token;
3369                 irq.type = KVM_S390_INT_PFAULT_INIT;
3370                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3371         } else {
3372                 inti.type = KVM_S390_INT_PFAULT_DONE;
3373                 inti.parm64 = token;
3374                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3375         }
3376 }
3377
3378 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3379                                      struct kvm_async_pf *work)
3380 {
3381         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3382         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3383 }
3384
3385 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3386                                  struct kvm_async_pf *work)
3387 {
3388         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3389         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3390 }
3391
3392 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3393                                struct kvm_async_pf *work)
3394 {
3395         /* s390 will always inject the page directly */
3396 }
3397
3398 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3399 {
3400         /*
3401          * s390 will always inject the page directly,
3402          * but we still want check_async_completion to cleanup
3403          */
3404         return true;
3405 }
3406
3407 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3408 {
3409         hva_t hva;
3410         struct kvm_arch_async_pf arch;
3411         int rc;
3412
3413         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3414                 return 0;
3415         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3416             vcpu->arch.pfault_compare)
3417                 return 0;
3418         if (psw_extint_disabled(vcpu))
3419                 return 0;
3420         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3421                 return 0;
3422         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3423                 return 0;
3424         if (!vcpu->arch.gmap->pfault_enabled)
3425                 return 0;
3426
3427         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3428         hva += current->thread.gmap_addr & ~PAGE_MASK;
3429         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3430                 return 0;
3431
3432         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3433         return rc;
3434 }
3435
3436 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3437 {
3438         int rc, cpuflags;
3439
3440         /*
3441          * On s390 notifications for arriving pages will be delivered directly
3442          * to the guest but the house keeping for completed pfaults is
3443          * handled outside the worker.
3444          */
3445         kvm_check_async_pf_completion(vcpu);
3446
3447         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3448         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3449
3450         if (need_resched())
3451                 schedule();
3452
3453         if (test_cpu_flag(CIF_MCCK_PENDING))
3454                 s390_handle_mcck();
3455
3456         if (!kvm_is_ucontrol(vcpu->kvm)) {
3457                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3458                 if (rc)
3459                         return rc;
3460         }
3461
3462         rc = kvm_s390_handle_requests(vcpu);
3463         if (rc)
3464                 return rc;
3465
3466         if (guestdbg_enabled(vcpu)) {
3467                 kvm_s390_backup_guest_per_regs(vcpu);
3468                 kvm_s390_patch_guest_per_regs(vcpu);
3469         }
3470
3471         vcpu->arch.sie_block->icptcode = 0;
3472         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3473         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3474         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3475
3476         return 0;
3477 }
3478
3479 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3480 {
3481         struct kvm_s390_pgm_info pgm_info = {
3482                 .code = PGM_ADDRESSING,
3483         };
3484         u8 opcode, ilen;
3485         int rc;
3486
3487         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3488         trace_kvm_s390_sie_fault(vcpu);
3489
3490         /*
3491          * We want to inject an addressing exception, which is defined as a
3492          * suppressing or terminating exception. However, since we came here
3493          * by a DAT access exception, the PSW still points to the faulting
3494          * instruction since DAT exceptions are nullifying. So we've got
3495          * to look up the current opcode to get the length of the instruction
3496          * to be able to forward the PSW.
3497          */
3498         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3499         ilen = insn_length(opcode);
3500         if (rc < 0) {
3501                 return rc;
3502         } else if (rc) {
3503                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3504                  * Forward by arbitrary ilc, injection will take care of
3505                  * nullification if necessary.
3506                  */
3507                 pgm_info = vcpu->arch.pgm;
3508                 ilen = 4;
3509         }
3510         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3511         kvm_s390_forward_psw(vcpu, ilen);
3512         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3513 }
3514
3515 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3516 {
3517         struct mcck_volatile_info *mcck_info;
3518         struct sie_page *sie_page;
3519
3520         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3521                    vcpu->arch.sie_block->icptcode);
3522         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3523
3524         if (guestdbg_enabled(vcpu))
3525                 kvm_s390_restore_guest_per_regs(vcpu);
3526
3527         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3528         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3529
3530         if (exit_reason == -EINTR) {
3531                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3532                 sie_page = container_of(vcpu->arch.sie_block,
3533                                         struct sie_page, sie_block);
3534                 mcck_info = &sie_page->mcck_info;
3535                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3536                 return 0;
3537         }
3538
3539         if (vcpu->arch.sie_block->icptcode > 0) {
3540                 int rc = kvm_handle_sie_intercept(vcpu);
3541
3542                 if (rc != -EOPNOTSUPP)
3543                         return rc;
3544                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3545                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3546                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3547                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3548                 return -EREMOTE;
3549         } else if (exit_reason != -EFAULT) {
3550                 vcpu->stat.exit_null++;
3551                 return 0;
3552         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3553                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3554                 vcpu->run->s390_ucontrol.trans_exc_code =
3555                                                 current->thread.gmap_addr;
3556                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3557                 return -EREMOTE;
3558         } else if (current->thread.gmap_pfault) {
3559                 trace_kvm_s390_major_guest_pfault(vcpu);
3560                 current->thread.gmap_pfault = 0;
3561                 if (kvm_arch_setup_async_pf(vcpu))
3562                         return 0;
3563                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3564         }
3565         return vcpu_post_run_fault_in_sie(vcpu);
3566 }
3567
3568 static int __vcpu_run(struct kvm_vcpu *vcpu)
3569 {
3570         int rc, exit_reason;
3571
3572         /*
3573          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3574          * ning the guest), so that memslots (and other stuff) are protected
3575          */
3576         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3577
3578         do {
3579                 rc = vcpu_pre_run(vcpu);
3580                 if (rc)
3581                         break;
3582
3583                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3584                 /*
3585                  * As PF_VCPU will be used in fault handler, between
3586                  * guest_enter and guest_exit should be no uaccess.
3587                  */
3588                 local_irq_disable();
3589                 guest_enter_irqoff();
3590                 __disable_cpu_timer_accounting(vcpu);
3591                 local_irq_enable();
3592                 exit_reason = sie64a(vcpu->arch.sie_block,
3593                                      vcpu->run->s.regs.gprs);
3594                 local_irq_disable();
3595                 __enable_cpu_timer_accounting(vcpu);
3596                 guest_exit_irqoff();
3597                 local_irq_enable();
3598                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3599
3600                 rc = vcpu_post_run(vcpu, exit_reason);
3601         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3602
3603         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3604         return rc;
3605 }
3606
3607 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3608 {
3609         struct runtime_instr_cb *riccb;
3610         struct gs_cb *gscb;
3611
3612         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3613         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3614         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3615         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3616         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3617                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3618         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3619                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3620                 /* some control register changes require a tlb flush */
3621                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3622         }
3623         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3624                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3625                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3626                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3627                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3628                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3629         }
3630         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3631                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3632                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3633                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3634                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3635                         kvm_clear_async_pf_completion_queue(vcpu);
3636         }
3637         /*
3638          * If userspace sets the riccb (e.g. after migration) to a valid state,
3639          * we should enable RI here instead of doing the lazy enablement.
3640          */
3641         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3642             test_kvm_facility(vcpu->kvm, 64) &&
3643             riccb->v &&
3644             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3645                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3646                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3647         }
3648         /*
3649          * If userspace sets the gscb (e.g. after migration) to non-zero,
3650          * we should enable GS here instead of doing the lazy enablement.
3651          */
3652         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3653             test_kvm_facility(vcpu->kvm, 133) &&
3654             gscb->gssm &&
3655             !vcpu->arch.gs_enabled) {
3656                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3657                 vcpu->arch.sie_block->ecb |= ECB_GS;
3658                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3659                 vcpu->arch.gs_enabled = 1;
3660         }
3661         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3662             test_kvm_facility(vcpu->kvm, 82)) {
3663                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3664                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3665         }
3666         save_access_regs(vcpu->arch.host_acrs);
3667         restore_access_regs(vcpu->run->s.regs.acrs);
3668         /* save host (userspace) fprs/vrs */
3669         save_fpu_regs();
3670         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3671         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3672         if (MACHINE_HAS_VX)
3673                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3674         else
3675                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3676         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3677         if (test_fp_ctl(current->thread.fpu.fpc))
3678                 /* User space provided an invalid FPC, let's clear it */
3679                 current->thread.fpu.fpc = 0;
3680         if (MACHINE_HAS_GS) {
3681                 preempt_disable();
3682                 __ctl_set_bit(2, 4);
3683                 if (current->thread.gs_cb) {
3684                         vcpu->arch.host_gscb = current->thread.gs_cb;
3685                         save_gs_cb(vcpu->arch.host_gscb);
3686                 }
3687                 if (vcpu->arch.gs_enabled) {
3688                         current->thread.gs_cb = (struct gs_cb *)
3689                                                 &vcpu->run->s.regs.gscb;
3690                         restore_gs_cb(current->thread.gs_cb);
3691                 }
3692                 preempt_enable();
3693         }
3694         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3695
3696         kvm_run->kvm_dirty_regs = 0;
3697 }
3698
3699 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3700 {
3701         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3702         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3703         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3704         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3705         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3706         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3707         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3708         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3709         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3710         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3711         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3712         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3713         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3714         save_access_regs(vcpu->run->s.regs.acrs);
3715         restore_access_regs(vcpu->arch.host_acrs);
3716         /* Save guest register state */
3717         save_fpu_regs();
3718         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3719         /* Restore will be done lazily at return */
3720         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3721         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3722         if (MACHINE_HAS_GS) {
3723                 __ctl_set_bit(2, 4);
3724                 if (vcpu->arch.gs_enabled)
3725                         save_gs_cb(current->thread.gs_cb);
3726                 preempt_disable();
3727                 current->thread.gs_cb = vcpu->arch.host_gscb;
3728                 restore_gs_cb(vcpu->arch.host_gscb);
3729                 preempt_enable();
3730                 if (!vcpu->arch.host_gscb)
3731                         __ctl_clear_bit(2, 4);
3732                 vcpu->arch.host_gscb = NULL;
3733         }
3734         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3735 }
3736
3737 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3738 {
3739         int rc;
3740
3741         if (kvm_run->immediate_exit)
3742                 return -EINTR;
3743
3744         vcpu_load(vcpu);
3745
3746         if (guestdbg_exit_pending(vcpu)) {
3747                 kvm_s390_prepare_debug_exit(vcpu);
3748                 rc = 0;
3749                 goto out;
3750         }
3751
3752         kvm_sigset_activate(vcpu);
3753
3754         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3755                 kvm_s390_vcpu_start(vcpu);
3756         } else if (is_vcpu_stopped(vcpu)) {
3757                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3758                                    vcpu->vcpu_id);
3759                 rc = -EINVAL;
3760                 goto out;
3761         }
3762
3763         sync_regs(vcpu, kvm_run);
3764         enable_cpu_timer_accounting(vcpu);
3765
3766         might_fault();
3767         rc = __vcpu_run(vcpu);
3768
3769         if (signal_pending(current) && !rc) {
3770                 kvm_run->exit_reason = KVM_EXIT_INTR;
3771                 rc = -EINTR;
3772         }
3773
3774         if (guestdbg_exit_pending(vcpu) && !rc)  {
3775                 kvm_s390_prepare_debug_exit(vcpu);
3776                 rc = 0;
3777         }
3778
3779         if (rc == -EREMOTE) {
3780                 /* userspace support is needed, kvm_run has been prepared */
3781                 rc = 0;
3782         }
3783
3784         disable_cpu_timer_accounting(vcpu);
3785         store_regs(vcpu, kvm_run);
3786
3787         kvm_sigset_deactivate(vcpu);
3788
3789         vcpu->stat.exit_userspace++;
3790 out:
3791         vcpu_put(vcpu);
3792         return rc;
3793 }
3794
3795 /*
3796  * store status at address
3797  * we use have two special cases:
3798  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3799  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3800  */
3801 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3802 {
3803         unsigned char archmode = 1;
3804         freg_t fprs[NUM_FPRS];
3805         unsigned int px;
3806         u64 clkcomp, cputm;
3807         int rc;
3808
3809         px = kvm_s390_get_prefix(vcpu);
3810         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3811                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3812                         return -EFAULT;
3813                 gpa = 0;
3814         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3815                 if (write_guest_real(vcpu, 163, &archmode, 1))
3816                         return -EFAULT;
3817                 gpa = px;
3818         } else
3819                 gpa -= __LC_FPREGS_SAVE_AREA;
3820
3821         /* manually convert vector registers if necessary */
3822         if (MACHINE_HAS_VX) {
3823                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3824                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3825                                      fprs, 128);
3826         } else {
3827                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3828                                      vcpu->run->s.regs.fprs, 128);
3829         }
3830         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3831                               vcpu->run->s.regs.gprs, 128);
3832         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3833                               &vcpu->arch.sie_block->gpsw, 16);
3834         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3835                               &px, 4);
3836         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3837                               &vcpu->run->s.regs.fpc, 4);
3838         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3839                               &vcpu->arch.sie_block->todpr, 4);
3840         cputm = kvm_s390_get_cpu_timer(vcpu);
3841         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3842                               &cputm, 8);
3843         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3844         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3845                               &clkcomp, 8);
3846         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3847                               &vcpu->run->s.regs.acrs, 64);
3848         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3849                               &vcpu->arch.sie_block->gcr, 128);
3850         return rc ? -EFAULT : 0;
3851 }
3852
3853 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3854 {
3855         /*
3856          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3857          * switch in the run ioctl. Let's update our copies before we save
3858          * it into the save area
3859          */
3860         save_fpu_regs();
3861         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3862         save_access_regs(vcpu->run->s.regs.acrs);
3863
3864         return kvm_s390_store_status_unloaded(vcpu, addr);
3865 }
3866
3867 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3868 {
3869         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3870         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3871 }
3872
3873 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3874 {
3875         unsigned int i;
3876         struct kvm_vcpu *vcpu;
3877
3878         kvm_for_each_vcpu(i, vcpu, kvm) {
3879                 __disable_ibs_on_vcpu(vcpu);
3880         }
3881 }
3882
3883 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3884 {
3885         if (!sclp.has_ibs)
3886                 return;
3887         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3888         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3889 }
3890
3891 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3892 {
3893         int i, online_vcpus, started_vcpus = 0;
3894
3895         if (!is_vcpu_stopped(vcpu))
3896                 return;
3897
3898         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3899         /* Only one cpu at a time may enter/leave the STOPPED state. */
3900         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3901         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3902
3903         for (i = 0; i < online_vcpus; i++) {
3904                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3905                         started_vcpus++;
3906         }
3907
3908         if (started_vcpus == 0) {
3909                 /* we're the only active VCPU -> speed it up */
3910                 __enable_ibs_on_vcpu(vcpu);
3911         } else if (started_vcpus == 1) {
3912                 /*
3913                  * As we are starting a second VCPU, we have to disable
3914                  * the IBS facility on all VCPUs to remove potentially
3915                  * oustanding ENABLE requests.
3916                  */
3917                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3918         }
3919
3920         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3921         /*
3922          * Another VCPU might have used IBS while we were offline.
3923          * Let's play safe and flush the VCPU at startup.
3924          */
3925         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3926         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3927         return;
3928 }
3929
3930 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3931 {
3932         int i, online_vcpus, started_vcpus = 0;
3933         struct kvm_vcpu *started_vcpu = NULL;
3934
3935         if (is_vcpu_stopped(vcpu))
3936                 return;
3937
3938         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3939         /* Only one cpu at a time may enter/leave the STOPPED state. */
3940         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3941         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3942
3943         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3944         kvm_s390_clear_stop_irq(vcpu);
3945
3946         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3947         __disable_ibs_on_vcpu(vcpu);
3948
3949         for (i = 0; i < online_vcpus; i++) {
3950                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3951                         started_vcpus++;
3952                         started_vcpu = vcpu->kvm->vcpus[i];
3953                 }
3954         }
3955
3956         if (started_vcpus == 1) {
3957                 /*
3958                  * As we only have one VCPU left, we want to enable the
3959                  * IBS facility for that VCPU to speed it up.
3960                  */
3961                 __enable_ibs_on_vcpu(started_vcpu);
3962         }
3963
3964         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3965         return;
3966 }
3967
3968 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3969                                      struct kvm_enable_cap *cap)
3970 {
3971         int r;
3972
3973         if (cap->flags)
3974                 return -EINVAL;
3975
3976         switch (cap->cap) {
3977         case KVM_CAP_S390_CSS_SUPPORT:
3978                 if (!vcpu->kvm->arch.css_support) {
3979                         vcpu->kvm->arch.css_support = 1;
3980                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3981                         trace_kvm_s390_enable_css(vcpu->kvm);
3982                 }
3983                 r = 0;
3984                 break;
3985         default:
3986                 r = -EINVAL;
3987                 break;
3988         }
3989         return r;
3990 }
3991
3992 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3993                                   struct kvm_s390_mem_op *mop)
3994 {
3995         void __user *uaddr = (void __user *)mop->buf;
3996         void *tmpbuf = NULL;
3997         int r, srcu_idx;
3998         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3999                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4000
4001         if (mop->flags & ~supported_flags)
4002                 return -EINVAL;
4003
4004         if (mop->size > MEM_OP_MAX_SIZE)
4005                 return -E2BIG;
4006
4007         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4008                 tmpbuf = vmalloc(mop->size);
4009                 if (!tmpbuf)
4010                         return -ENOMEM;
4011         }
4012
4013         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4014
4015         switch (mop->op) {
4016         case KVM_S390_MEMOP_LOGICAL_READ:
4017                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4018                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4019                                             mop->size, GACC_FETCH);
4020                         break;
4021                 }
4022                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4023                 if (r == 0) {
4024                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4025                                 r = -EFAULT;
4026                 }
4027                 break;
4028         case KVM_S390_MEMOP_LOGICAL_WRITE:
4029                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4030                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4031                                             mop->size, GACC_STORE);
4032                         break;
4033                 }
4034                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4035                         r = -EFAULT;
4036                         break;
4037                 }
4038                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4039                 break;
4040         default:
4041                 r = -EINVAL;
4042         }
4043
4044         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4045
4046         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4047                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4048
4049         vfree(tmpbuf);
4050         return r;
4051 }
4052
4053 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4054                                unsigned int ioctl, unsigned long arg)
4055 {
4056         struct kvm_vcpu *vcpu = filp->private_data;
4057         void __user *argp = (void __user *)arg;
4058
4059         switch (ioctl) {
4060         case KVM_S390_IRQ: {
4061                 struct kvm_s390_irq s390irq;
4062
4063                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4064                         return -EFAULT;
4065                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4066         }
4067         case KVM_S390_INTERRUPT: {
4068                 struct kvm_s390_interrupt s390int;
4069                 struct kvm_s390_irq s390irq;
4070
4071                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4072                         return -EFAULT;
4073                 if (s390int_to_s390irq(&s390int, &s390irq))
4074                         return -EINVAL;
4075                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4076         }
4077         }
4078         return -ENOIOCTLCMD;
4079 }
4080
4081 long kvm_arch_vcpu_ioctl(struct file *filp,
4082                          unsigned int ioctl, unsigned long arg)
4083 {
4084         struct kvm_vcpu *vcpu = filp->private_data;
4085         void __user *argp = (void __user *)arg;
4086         int idx;
4087         long r;
4088
4089         vcpu_load(vcpu);
4090
4091         switch (ioctl) {
4092         case KVM_S390_STORE_STATUS:
4093                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4094                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4095                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4096                 break;
4097         case KVM_S390_SET_INITIAL_PSW: {
4098                 psw_t psw;
4099
4100                 r = -EFAULT;
4101                 if (copy_from_user(&psw, argp, sizeof(psw)))
4102                         break;
4103                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4104                 break;
4105         }
4106         case KVM_S390_INITIAL_RESET:
4107                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4108                 break;
4109         case KVM_SET_ONE_REG:
4110         case KVM_GET_ONE_REG: {
4111                 struct kvm_one_reg reg;
4112                 r = -EFAULT;
4113                 if (copy_from_user(&reg, argp, sizeof(reg)))
4114                         break;
4115                 if (ioctl == KVM_SET_ONE_REG)
4116                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4117                 else
4118                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4119                 break;
4120         }
4121 #ifdef CONFIG_KVM_S390_UCONTROL
4122         case KVM_S390_UCAS_MAP: {
4123                 struct kvm_s390_ucas_mapping ucasmap;
4124
4125                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4126                         r = -EFAULT;
4127                         break;
4128                 }
4129
4130                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4131                         r = -EINVAL;
4132                         break;
4133                 }
4134
4135                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4136                                      ucasmap.vcpu_addr, ucasmap.length);
4137                 break;
4138         }
4139         case KVM_S390_UCAS_UNMAP: {
4140                 struct kvm_s390_ucas_mapping ucasmap;
4141
4142                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4143                         r = -EFAULT;
4144                         break;
4145                 }
4146
4147                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4148                         r = -EINVAL;
4149                         break;
4150                 }
4151
4152                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4153                         ucasmap.length);
4154                 break;
4155         }
4156 #endif
4157         case KVM_S390_VCPU_FAULT: {
4158                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4159                 break;
4160         }
4161         case KVM_ENABLE_CAP:
4162         {
4163                 struct kvm_enable_cap cap;
4164                 r = -EFAULT;
4165                 if (copy_from_user(&cap, argp, sizeof(cap)))
4166                         break;
4167                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4168                 break;
4169         }
4170         case KVM_S390_MEM_OP: {
4171                 struct kvm_s390_mem_op mem_op;
4172
4173                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4174                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4175                 else
4176                         r = -EFAULT;
4177                 break;
4178         }
4179         case KVM_S390_SET_IRQ_STATE: {
4180                 struct kvm_s390_irq_state irq_state;
4181
4182                 r = -EFAULT;
4183                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4184                         break;
4185                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4186                     irq_state.len == 0 ||
4187                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4188                         r = -EINVAL;
4189                         break;
4190                 }
4191                 /* do not use irq_state.flags, it will break old QEMUs */
4192                 r = kvm_s390_set_irq_state(vcpu,
4193                                            (void __user *) irq_state.buf,
4194                                            irq_state.len);
4195                 break;
4196         }
4197         case KVM_S390_GET_IRQ_STATE: {
4198                 struct kvm_s390_irq_state irq_state;
4199
4200                 r = -EFAULT;
4201                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4202                         break;
4203                 if (irq_state.len == 0) {
4204                         r = -EINVAL;
4205                         break;
4206                 }
4207                 /* do not use irq_state.flags, it will break old QEMUs */
4208                 r = kvm_s390_get_irq_state(vcpu,
4209                                            (__u8 __user *)  irq_state.buf,
4210                                            irq_state.len);
4211                 break;
4212         }
4213         default:
4214                 r = -ENOTTY;
4215         }
4216
4217         vcpu_put(vcpu);
4218         return r;
4219 }
4220
4221 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4222 {
4223 #ifdef CONFIG_KVM_S390_UCONTROL
4224         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4225                  && (kvm_is_ucontrol(vcpu->kvm))) {
4226                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4227                 get_page(vmf->page);
4228                 return 0;
4229         }
4230 #endif
4231         return VM_FAULT_SIGBUS;
4232 }
4233
4234 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4235                             unsigned long npages)
4236 {
4237         return 0;
4238 }
4239
4240 /* Section: memory related */
4241 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4242                                    struct kvm_memory_slot *memslot,
4243                                    const struct kvm_userspace_memory_region *mem,
4244                                    enum kvm_mr_change change)
4245 {
4246         /* A few sanity checks. We can have memory slots which have to be
4247            located/ended at a segment boundary (1MB). The memory in userland is
4248            ok to be fragmented into various different vmas. It is okay to mmap()
4249            and munmap() stuff in this slot after doing this call at any time */
4250
4251         if (mem->userspace_addr & 0xffffful)
4252                 return -EINVAL;
4253
4254         if (mem->memory_size & 0xffffful)
4255                 return -EINVAL;
4256
4257         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4258                 return -EINVAL;
4259
4260         return 0;
4261 }
4262
4263 void kvm_arch_commit_memory_region(struct kvm *kvm,
4264                                 const struct kvm_userspace_memory_region *mem,
4265                                 const struct kvm_memory_slot *old,
4266                                 const struct kvm_memory_slot *new,
4267                                 enum kvm_mr_change change)
4268 {
4269         int rc;
4270
4271         /* If the basics of the memslot do not change, we do not want
4272          * to update the gmap. Every update causes several unnecessary
4273          * segment translation exceptions. This is usually handled just
4274          * fine by the normal fault handler + gmap, but it will also
4275          * cause faults on the prefix page of running guest CPUs.
4276          */
4277         if (old->userspace_addr == mem->userspace_addr &&
4278             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4279             old->npages * PAGE_SIZE == mem->memory_size)
4280                 return;
4281
4282         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4283                 mem->guest_phys_addr, mem->memory_size);
4284         if (rc)
4285                 pr_warn("failed to commit memory region\n");
4286         return;
4287 }
4288
4289 static inline unsigned long nonhyp_mask(int i)
4290 {
4291         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4292
4293         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4294 }
4295
4296 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4297 {
4298         vcpu->valid_wakeup = false;
4299 }
4300
4301 static int __init kvm_s390_init(void)
4302 {
4303         int i;
4304
4305         if (!sclp.has_sief2) {
4306                 pr_info("SIE not available\n");
4307                 return -ENODEV;
4308         }
4309
4310         if (nested && hpage) {
4311                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4312                 return -EINVAL;
4313         }
4314
4315         for (i = 0; i < 16; i++)
4316                 kvm_s390_fac_base[i] |=
4317                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4318
4319         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4320 }
4321
4322 static void __exit kvm_s390_exit(void)
4323 {
4324         kvm_exit();
4325 }
4326
4327 module_init(kvm_s390_init);
4328 module_exit(kvm_s390_exit);
4329
4330 /*
4331  * Enable autoloading of the kvm module.
4332  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4333  * since x86 takes a different approach.
4334  */
4335 #include <linux/miscdevice.h>
4336 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4337 MODULE_ALIAS("devname:kvm");