1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
47 #define KMSG_COMPONENT "kvm-s390"
49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51 #define CREATE_TRACE_POINTS
53 #include "trace-s390.h"
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { "userspace_handled", VCPU_STAT(exit_userspace) },
65 { "exit_null", VCPU_STAT(exit_null) },
66 { "exit_validity", VCPU_STAT(exit_validity) },
67 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 { "exit_external_request", VCPU_STAT(exit_external_request) },
69 { "exit_io_request", VCPU_STAT(exit_io_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 { "deliver_program", VCPU_STAT(deliver_program) },
94 { "deliver_io", VCPU_STAT(deliver_io) },
95 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 { "inject_ckc", VCPU_STAT(inject_ckc) },
98 { "inject_cputm", VCPU_STAT(inject_cputm) },
99 { "inject_external_call", VCPU_STAT(inject_external_call) },
100 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 { "inject_io", VM_STAT(inject_io) },
103 { "inject_mchk", VCPU_STAT(inject_mchk) },
104 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 { "inject_program", VCPU_STAT(inject_program) },
106 { "inject_restart", VCPU_STAT(inject_restart) },
107 { "inject_service_signal", VM_STAT(inject_service_signal) },
108 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 { "inject_virtio", VM_STAT(inject_virtio) },
112 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 { "instruction_gs", VCPU_STAT(instruction_gs) },
114 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 { "instruction_sck", VCPU_STAT(instruction_sck) },
121 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 { "instruction_spx", VCPU_STAT(instruction_spx) },
123 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 { "instruction_stap", VCPU_STAT(instruction_stap) },
125 { "instruction_iske", VCPU_STAT(instruction_iske) },
126 { "instruction_ri", VCPU_STAT(instruction_ri) },
127 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 { "instruction_sske", VCPU_STAT(instruction_sske) },
129 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 { "instruction_essa", VCPU_STAT(instruction_essa) },
131 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 { "instruction_tb", VCPU_STAT(instruction_tb) },
134 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 { "instruction_sie", VCPU_STAT(instruction_sie) },
139 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
159 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
160 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
161 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
165 struct kvm_s390_tod_clock_ext {
171 /* allow nested virtualization in KVM (if enabled by user space) */
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 /* allow 1m huge page guest backing, if !nested */
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182 * For now we handle at most 16 double words as this is what the s390 base
183 * kernel handles and stores in the prefix page. If we ever need to go beyond
184 * this, this requires changes to code, but the external uapi can stay.
186 #define SIZE_INTERNAL 16
189 * Base feature mask that defines default mask for facilities. Consists of the
190 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
194 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
195 * and defines the facilities that can be enabled via a cpu model.
197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
199 static unsigned long kvm_s390_fac_size(void)
201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
202 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
203 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
204 sizeof(S390_lowcore.stfle_fac_list));
206 return SIZE_INTERNAL;
209 /* available cpu features supported by kvm */
210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
211 /* available subfunctions indicated via query / "test bit" */
212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
214 static struct gmap_notifier gmap_notifier;
215 static struct gmap_notifier vsie_gmap_notifier;
216 debug_info_t *kvm_s390_dbf;
218 /* Section: not file related */
219 int kvm_arch_hardware_enable(void)
221 /* every s390 is virtualization enabled ;-) */
225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
233 * The TOD jumps by delta, we have to compensate this by adding
234 * -delta to the epoch.
238 /* sign-extension - we're adding to signed values below */
243 if (scb->ecd & ECD_MEF) {
244 scb->epdx += delta_idx;
245 if (scb->epoch < delta)
251 * This callback is executed during stop_machine(). All CPUs are therefore
252 * temporarily stopped. In order not to change guest behavior, we have to
253 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
254 * so a CPU won't be stopped while calculating with the epoch.
256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
260 struct kvm_vcpu *vcpu;
262 unsigned long long *delta = v;
264 list_for_each_entry(kvm, &vm_list, vm_list) {
265 kvm_for_each_vcpu(i, vcpu, kvm) {
266 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
268 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
269 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
271 if (vcpu->arch.cputm_enabled)
272 vcpu->arch.cputm_start += *delta;
273 if (vcpu->arch.vsie_block)
274 kvm_clock_sync_scb(vcpu->arch.vsie_block,
281 static struct notifier_block kvm_clock_notifier = {
282 .notifier_call = kvm_clock_sync,
285 int kvm_arch_hardware_setup(void)
287 gmap_notifier.notifier_call = kvm_gmap_notifier;
288 gmap_register_pte_notifier(&gmap_notifier);
289 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
290 gmap_register_pte_notifier(&vsie_gmap_notifier);
291 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
292 &kvm_clock_notifier);
296 void kvm_arch_hardware_unsetup(void)
298 gmap_unregister_pte_notifier(&gmap_notifier);
299 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
300 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
301 &kvm_clock_notifier);
304 static void allow_cpu_feat(unsigned long nr)
306 set_bit_inv(nr, kvm_s390_available_cpu_feat);
309 static inline int plo_test_bit(unsigned char nr)
311 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
315 /* Parameter registers are ignored for "test bit" */
325 static void kvm_s390_cpu_feat_init(void)
329 for (i = 0; i < 256; ++i) {
331 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
334 if (test_facility(28)) /* TOD-clock steering */
335 ptff(kvm_s390_available_subfunc.ptff,
336 sizeof(kvm_s390_available_subfunc.ptff),
339 if (test_facility(17)) { /* MSA */
340 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
341 kvm_s390_available_subfunc.kmac);
342 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
343 kvm_s390_available_subfunc.kmc);
344 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
345 kvm_s390_available_subfunc.km);
346 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
347 kvm_s390_available_subfunc.kimd);
348 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
349 kvm_s390_available_subfunc.klmd);
351 if (test_facility(76)) /* MSA3 */
352 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
353 kvm_s390_available_subfunc.pckmo);
354 if (test_facility(77)) { /* MSA4 */
355 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
356 kvm_s390_available_subfunc.kmctr);
357 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
358 kvm_s390_available_subfunc.kmf);
359 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
360 kvm_s390_available_subfunc.kmo);
361 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
362 kvm_s390_available_subfunc.pcc);
364 if (test_facility(57)) /* MSA5 */
365 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
366 kvm_s390_available_subfunc.ppno);
368 if (test_facility(146)) /* MSA8 */
369 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
370 kvm_s390_available_subfunc.kma);
372 if (MACHINE_HAS_ESOP)
373 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
375 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
376 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
378 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
379 !test_facility(3) || !nested)
381 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
382 if (sclp.has_64bscao)
383 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
385 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
387 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
389 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
391 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
393 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
395 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
397 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
399 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
400 * all skey handling functions read/set the skey from the PGSTE
401 * instead of the real storage key.
403 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
404 * pages being detected as preserved although they are resident.
406 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
407 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
409 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
410 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
411 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
413 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
414 * cannot easily shadow the SCA because of the ipte lock.
418 int kvm_arch_init(void *opaque)
422 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
426 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
428 goto out_debug_unreg;
431 kvm_s390_cpu_feat_init();
433 /* Register floating interrupt controller interface. */
434 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
436 pr_err("Failed to register FLIC rc=%d\n", rc);
437 goto out_debug_unreg;
442 debug_unregister(kvm_s390_dbf);
446 void kvm_arch_exit(void)
448 debug_unregister(kvm_s390_dbf);
451 /* Section: device related */
452 long kvm_arch_dev_ioctl(struct file *filp,
453 unsigned int ioctl, unsigned long arg)
455 if (ioctl == KVM_S390_ENABLE_SIE)
456 return s390_enable_sie();
460 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
465 case KVM_CAP_S390_PSW:
466 case KVM_CAP_S390_GMAP:
467 case KVM_CAP_SYNC_MMU:
468 #ifdef CONFIG_KVM_S390_UCONTROL
469 case KVM_CAP_S390_UCONTROL:
471 case KVM_CAP_ASYNC_PF:
472 case KVM_CAP_SYNC_REGS:
473 case KVM_CAP_ONE_REG:
474 case KVM_CAP_ENABLE_CAP:
475 case KVM_CAP_S390_CSS_SUPPORT:
476 case KVM_CAP_IOEVENTFD:
477 case KVM_CAP_DEVICE_CTRL:
478 case KVM_CAP_ENABLE_CAP_VM:
479 case KVM_CAP_S390_IRQCHIP:
480 case KVM_CAP_VM_ATTRIBUTES:
481 case KVM_CAP_MP_STATE:
482 case KVM_CAP_IMMEDIATE_EXIT:
483 case KVM_CAP_S390_INJECT_IRQ:
484 case KVM_CAP_S390_USER_SIGP:
485 case KVM_CAP_S390_USER_STSI:
486 case KVM_CAP_S390_SKEYS:
487 case KVM_CAP_S390_IRQ_STATE:
488 case KVM_CAP_S390_USER_INSTR0:
489 case KVM_CAP_S390_CMMA_MIGRATION:
490 case KVM_CAP_S390_AIS:
491 case KVM_CAP_S390_AIS_MIGRATION:
494 case KVM_CAP_S390_HPAGE_1M:
496 if (hpage && !kvm_is_ucontrol(kvm))
499 case KVM_CAP_S390_MEM_OP:
502 case KVM_CAP_NR_VCPUS:
503 case KVM_CAP_MAX_VCPUS:
504 r = KVM_S390_BSCA_CPU_SLOTS;
505 if (!kvm_s390_use_sca_entries())
507 else if (sclp.has_esca && sclp.has_64bscao)
508 r = KVM_S390_ESCA_CPU_SLOTS;
510 case KVM_CAP_NR_MEMSLOTS:
511 r = KVM_USER_MEM_SLOTS;
513 case KVM_CAP_S390_COW:
514 r = MACHINE_HAS_ESOP;
516 case KVM_CAP_S390_VECTOR_REGISTERS:
519 case KVM_CAP_S390_RI:
520 r = test_facility(64);
522 case KVM_CAP_S390_GS:
523 r = test_facility(133);
525 case KVM_CAP_S390_BPB:
526 r = test_facility(82);
534 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
535 struct kvm_memory_slot *memslot)
538 gfn_t cur_gfn, last_gfn;
539 unsigned long gaddr, vmaddr;
540 struct gmap *gmap = kvm->arch.gmap;
541 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
543 /* Loop over all guest segments */
544 cur_gfn = memslot->base_gfn;
545 last_gfn = memslot->base_gfn + memslot->npages;
546 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
547 gaddr = gfn_to_gpa(cur_gfn);
548 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
549 if (kvm_is_error_hva(vmaddr))
552 bitmap_zero(bitmap, _PAGE_ENTRIES);
553 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
554 for (i = 0; i < _PAGE_ENTRIES; i++) {
555 if (test_bit(i, bitmap))
556 mark_page_dirty(kvm, cur_gfn + i);
559 if (fatal_signal_pending(current))
565 /* Section: vm related */
566 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
569 * Get (and clear) the dirty memory log for a memory slot.
571 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
572 struct kvm_dirty_log *log)
576 struct kvm_memslots *slots;
577 struct kvm_memory_slot *memslot;
580 if (kvm_is_ucontrol(kvm))
583 mutex_lock(&kvm->slots_lock);
586 if (log->slot >= KVM_USER_MEM_SLOTS)
589 slots = kvm_memslots(kvm);
590 memslot = id_to_memslot(slots, log->slot);
592 if (!memslot->dirty_bitmap)
595 kvm_s390_sync_dirty_log(kvm, memslot);
596 r = kvm_get_dirty_log(kvm, log, &is_dirty);
600 /* Clear the dirty log */
602 n = kvm_dirty_bitmap_bytes(memslot);
603 memset(memslot->dirty_bitmap, 0, n);
607 mutex_unlock(&kvm->slots_lock);
611 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
614 struct kvm_vcpu *vcpu;
616 kvm_for_each_vcpu(i, vcpu, kvm) {
617 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
621 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
629 case KVM_CAP_S390_IRQCHIP:
630 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
631 kvm->arch.use_irqchip = 1;
634 case KVM_CAP_S390_USER_SIGP:
635 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
636 kvm->arch.user_sigp = 1;
639 case KVM_CAP_S390_VECTOR_REGISTERS:
640 mutex_lock(&kvm->lock);
641 if (kvm->created_vcpus) {
643 } else if (MACHINE_HAS_VX) {
644 set_kvm_facility(kvm->arch.model.fac_mask, 129);
645 set_kvm_facility(kvm->arch.model.fac_list, 129);
646 if (test_facility(134)) {
647 set_kvm_facility(kvm->arch.model.fac_mask, 134);
648 set_kvm_facility(kvm->arch.model.fac_list, 134);
650 if (test_facility(135)) {
651 set_kvm_facility(kvm->arch.model.fac_mask, 135);
652 set_kvm_facility(kvm->arch.model.fac_list, 135);
657 mutex_unlock(&kvm->lock);
658 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
659 r ? "(not available)" : "(success)");
661 case KVM_CAP_S390_RI:
663 mutex_lock(&kvm->lock);
664 if (kvm->created_vcpus) {
666 } else if (test_facility(64)) {
667 set_kvm_facility(kvm->arch.model.fac_mask, 64);
668 set_kvm_facility(kvm->arch.model.fac_list, 64);
671 mutex_unlock(&kvm->lock);
672 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
673 r ? "(not available)" : "(success)");
675 case KVM_CAP_S390_AIS:
676 mutex_lock(&kvm->lock);
677 if (kvm->created_vcpus) {
680 set_kvm_facility(kvm->arch.model.fac_mask, 72);
681 set_kvm_facility(kvm->arch.model.fac_list, 72);
684 mutex_unlock(&kvm->lock);
685 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
686 r ? "(not available)" : "(success)");
688 case KVM_CAP_S390_GS:
690 mutex_lock(&kvm->lock);
691 if (kvm->created_vcpus) {
693 } else if (test_facility(133)) {
694 set_kvm_facility(kvm->arch.model.fac_mask, 133);
695 set_kvm_facility(kvm->arch.model.fac_list, 133);
698 mutex_unlock(&kvm->lock);
699 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
700 r ? "(not available)" : "(success)");
702 case KVM_CAP_S390_HPAGE_1M:
703 mutex_lock(&kvm->lock);
704 if (kvm->created_vcpus)
706 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
710 down_write(&kvm->mm->mmap_sem);
711 kvm->mm->context.allow_gmap_hpage_1m = 1;
712 up_write(&kvm->mm->mmap_sem);
714 * We might have to create fake 4k page
715 * tables. To avoid that the hardware works on
716 * stale PGSTEs, we emulate these instructions.
718 kvm->arch.use_skf = 0;
719 kvm->arch.use_pfmfi = 0;
721 mutex_unlock(&kvm->lock);
722 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
723 r ? "(not available)" : "(success)");
725 case KVM_CAP_S390_USER_STSI:
726 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
727 kvm->arch.user_stsi = 1;
730 case KVM_CAP_S390_USER_INSTR0:
731 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
732 kvm->arch.user_instr0 = 1;
733 icpt_operexc_on_all_vcpus(kvm);
743 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
747 switch (attr->attr) {
748 case KVM_S390_VM_MEM_LIMIT_SIZE:
750 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
751 kvm->arch.mem_limit);
752 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
762 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
766 switch (attr->attr) {
767 case KVM_S390_VM_MEM_ENABLE_CMMA:
772 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
773 mutex_lock(&kvm->lock);
774 if (kvm->created_vcpus)
776 else if (kvm->mm->context.allow_gmap_hpage_1m)
779 kvm->arch.use_cmma = 1;
780 /* Not compatible with cmma. */
781 kvm->arch.use_pfmfi = 0;
784 mutex_unlock(&kvm->lock);
786 case KVM_S390_VM_MEM_CLR_CMMA:
791 if (!kvm->arch.use_cmma)
794 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
795 mutex_lock(&kvm->lock);
796 idx = srcu_read_lock(&kvm->srcu);
797 s390_reset_cmma(kvm->arch.gmap->mm);
798 srcu_read_unlock(&kvm->srcu, idx);
799 mutex_unlock(&kvm->lock);
802 case KVM_S390_VM_MEM_LIMIT_SIZE: {
803 unsigned long new_limit;
805 if (kvm_is_ucontrol(kvm))
808 if (get_user(new_limit, (u64 __user *)attr->addr))
811 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
812 new_limit > kvm->arch.mem_limit)
818 /* gmap_create takes last usable address */
819 if (new_limit != KVM_S390_NO_MEM_LIMIT)
823 mutex_lock(&kvm->lock);
824 if (!kvm->created_vcpus) {
825 /* gmap_create will round the limit up */
826 struct gmap *new = gmap_create(current->mm, new_limit);
831 gmap_remove(kvm->arch.gmap);
833 kvm->arch.gmap = new;
837 mutex_unlock(&kvm->lock);
838 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
839 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
840 (void *) kvm->arch.gmap->asce);
850 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
852 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
854 struct kvm_vcpu *vcpu;
857 kvm_s390_vcpu_block_all(kvm);
859 kvm_for_each_vcpu(i, vcpu, kvm) {
860 kvm_s390_vcpu_crypto_setup(vcpu);
861 /* recreate the shadow crycb by leaving the VSIE handler */
862 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
865 kvm_s390_vcpu_unblock_all(kvm);
868 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
870 mutex_lock(&kvm->lock);
871 switch (attr->attr) {
872 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
873 if (!test_kvm_facility(kvm, 76)) {
874 mutex_unlock(&kvm->lock);
878 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
879 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
880 kvm->arch.crypto.aes_kw = 1;
881 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
883 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
884 if (!test_kvm_facility(kvm, 76)) {
885 mutex_unlock(&kvm->lock);
889 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
890 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
891 kvm->arch.crypto.dea_kw = 1;
892 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
894 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
895 if (!test_kvm_facility(kvm, 76)) {
896 mutex_unlock(&kvm->lock);
899 kvm->arch.crypto.aes_kw = 0;
900 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
901 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
902 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
904 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
905 if (!test_kvm_facility(kvm, 76)) {
906 mutex_unlock(&kvm->lock);
909 kvm->arch.crypto.dea_kw = 0;
910 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
911 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
912 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
914 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
915 if (!ap_instructions_available()) {
916 mutex_unlock(&kvm->lock);
919 kvm->arch.crypto.apie = 1;
921 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
922 if (!ap_instructions_available()) {
923 mutex_unlock(&kvm->lock);
926 kvm->arch.crypto.apie = 0;
929 mutex_unlock(&kvm->lock);
933 kvm_s390_vcpu_crypto_reset_all(kvm);
934 mutex_unlock(&kvm->lock);
938 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
941 struct kvm_vcpu *vcpu;
943 kvm_for_each_vcpu(cx, vcpu, kvm)
944 kvm_s390_sync_request(req, vcpu);
948 * Must be called with kvm->srcu held to avoid races on memslots, and with
949 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
951 static int kvm_s390_vm_start_migration(struct kvm *kvm)
953 struct kvm_memory_slot *ms;
954 struct kvm_memslots *slots;
955 unsigned long ram_pages = 0;
958 /* migration mode already enabled */
959 if (kvm->arch.migration_mode)
961 slots = kvm_memslots(kvm);
962 if (!slots || !slots->used_slots)
965 if (!kvm->arch.use_cmma) {
966 kvm->arch.migration_mode = 1;
969 /* mark all the pages in active slots as dirty */
970 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
971 ms = slots->memslots + slotnr;
973 * The second half of the bitmap is only used on x86,
974 * and would be wasted otherwise, so we put it to good
975 * use here to keep track of the state of the storage
978 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
979 ram_pages += ms->npages;
981 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
982 kvm->arch.migration_mode = 1;
983 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
988 * Must be called with kvm->slots_lock to avoid races with ourselves and
989 * kvm_s390_vm_start_migration.
991 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
993 /* migration mode already disabled */
994 if (!kvm->arch.migration_mode)
996 kvm->arch.migration_mode = 0;
997 if (kvm->arch.use_cmma)
998 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1002 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1003 struct kvm_device_attr *attr)
1007 mutex_lock(&kvm->slots_lock);
1008 switch (attr->attr) {
1009 case KVM_S390_VM_MIGRATION_START:
1010 res = kvm_s390_vm_start_migration(kvm);
1012 case KVM_S390_VM_MIGRATION_STOP:
1013 res = kvm_s390_vm_stop_migration(kvm);
1018 mutex_unlock(&kvm->slots_lock);
1023 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1024 struct kvm_device_attr *attr)
1026 u64 mig = kvm->arch.migration_mode;
1028 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1031 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1036 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1038 struct kvm_s390_vm_tod_clock gtod;
1040 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1043 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1045 kvm_s390_set_tod_clock(kvm, >od);
1047 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1048 gtod.epoch_idx, gtod.tod);
1053 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1057 if (copy_from_user(>od_high, (void __user *)attr->addr,
1063 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1068 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1070 struct kvm_s390_vm_tod_clock gtod = { 0 };
1072 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1076 kvm_s390_set_tod_clock(kvm, >od);
1077 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1081 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1088 switch (attr->attr) {
1089 case KVM_S390_VM_TOD_EXT:
1090 ret = kvm_s390_set_tod_ext(kvm, attr);
1092 case KVM_S390_VM_TOD_HIGH:
1093 ret = kvm_s390_set_tod_high(kvm, attr);
1095 case KVM_S390_VM_TOD_LOW:
1096 ret = kvm_s390_set_tod_low(kvm, attr);
1105 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1106 struct kvm_s390_vm_tod_clock *gtod)
1108 struct kvm_s390_tod_clock_ext htod;
1112 get_tod_clock_ext((char *)&htod);
1114 gtod->tod = htod.tod + kvm->arch.epoch;
1115 gtod->epoch_idx = 0;
1116 if (test_kvm_facility(kvm, 139)) {
1117 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1118 if (gtod->tod < htod.tod)
1119 gtod->epoch_idx += 1;
1125 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1127 struct kvm_s390_vm_tod_clock gtod;
1129 memset(>od, 0, sizeof(gtod));
1130 kvm_s390_get_tod_clock(kvm, >od);
1131 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1134 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1135 gtod.epoch_idx, gtod.tod);
1139 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1143 if (copy_to_user((void __user *)attr->addr, >od_high,
1146 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1151 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 gtod = kvm_s390_get_tod_clock_fast(kvm);
1156 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1158 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1163 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170 switch (attr->attr) {
1171 case KVM_S390_VM_TOD_EXT:
1172 ret = kvm_s390_get_tod_ext(kvm, attr);
1174 case KVM_S390_VM_TOD_HIGH:
1175 ret = kvm_s390_get_tod_high(kvm, attr);
1177 case KVM_S390_VM_TOD_LOW:
1178 ret = kvm_s390_get_tod_low(kvm, attr);
1187 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1189 struct kvm_s390_vm_cpu_processor *proc;
1190 u16 lowest_ibc, unblocked_ibc;
1193 mutex_lock(&kvm->lock);
1194 if (kvm->created_vcpus) {
1198 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1203 if (!copy_from_user(proc, (void __user *)attr->addr,
1205 kvm->arch.model.cpuid = proc->cpuid;
1206 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1207 unblocked_ibc = sclp.ibc & 0xfff;
1208 if (lowest_ibc && proc->ibc) {
1209 if (proc->ibc > unblocked_ibc)
1210 kvm->arch.model.ibc = unblocked_ibc;
1211 else if (proc->ibc < lowest_ibc)
1212 kvm->arch.model.ibc = lowest_ibc;
1214 kvm->arch.model.ibc = proc->ibc;
1216 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1217 S390_ARCH_FAC_LIST_SIZE_BYTE);
1218 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1219 kvm->arch.model.ibc,
1220 kvm->arch.model.cpuid);
1221 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1222 kvm->arch.model.fac_list[0],
1223 kvm->arch.model.fac_list[1],
1224 kvm->arch.model.fac_list[2]);
1229 mutex_unlock(&kvm->lock);
1233 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1234 struct kvm_device_attr *attr)
1236 struct kvm_s390_vm_cpu_feat data;
1238 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1240 if (!bitmap_subset((unsigned long *) data.feat,
1241 kvm_s390_available_cpu_feat,
1242 KVM_S390_VM_CPU_FEAT_NR_BITS))
1245 mutex_lock(&kvm->lock);
1246 if (kvm->created_vcpus) {
1247 mutex_unlock(&kvm->lock);
1250 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1251 KVM_S390_VM_CPU_FEAT_NR_BITS);
1252 mutex_unlock(&kvm->lock);
1253 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1260 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1261 struct kvm_device_attr *attr)
1264 * Once supported by kernel + hw, we have to store the subfunctions
1265 * in kvm->arch and remember that user space configured them.
1270 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1274 switch (attr->attr) {
1275 case KVM_S390_VM_CPU_PROCESSOR:
1276 ret = kvm_s390_set_processor(kvm, attr);
1278 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1279 ret = kvm_s390_set_processor_feat(kvm, attr);
1281 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1282 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1288 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1290 struct kvm_s390_vm_cpu_processor *proc;
1293 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1298 proc->cpuid = kvm->arch.model.cpuid;
1299 proc->ibc = kvm->arch.model.ibc;
1300 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1301 S390_ARCH_FAC_LIST_SIZE_BYTE);
1302 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1303 kvm->arch.model.ibc,
1304 kvm->arch.model.cpuid);
1305 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1306 kvm->arch.model.fac_list[0],
1307 kvm->arch.model.fac_list[1],
1308 kvm->arch.model.fac_list[2]);
1309 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1316 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1318 struct kvm_s390_vm_cpu_machine *mach;
1321 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1326 get_cpu_id((struct cpuid *) &mach->cpuid);
1327 mach->ibc = sclp.ibc;
1328 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1329 S390_ARCH_FAC_LIST_SIZE_BYTE);
1330 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1331 sizeof(S390_lowcore.stfle_fac_list));
1332 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1333 kvm->arch.model.ibc,
1334 kvm->arch.model.cpuid);
1335 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1339 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1343 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1350 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1351 struct kvm_device_attr *attr)
1353 struct kvm_s390_vm_cpu_feat data;
1355 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1356 KVM_S390_VM_CPU_FEAT_NR_BITS);
1357 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1359 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1366 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1367 struct kvm_device_attr *attr)
1369 struct kvm_s390_vm_cpu_feat data;
1371 bitmap_copy((unsigned long *) data.feat,
1372 kvm_s390_available_cpu_feat,
1373 KVM_S390_VM_CPU_FEAT_NR_BITS);
1374 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1376 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1383 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1384 struct kvm_device_attr *attr)
1387 * Once we can actually configure subfunctions (kernel + hw support),
1388 * we have to check if they were already set by user space, if so copy
1389 * them from kvm->arch.
1394 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1395 struct kvm_device_attr *attr)
1397 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1398 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1402 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1406 switch (attr->attr) {
1407 case KVM_S390_VM_CPU_PROCESSOR:
1408 ret = kvm_s390_get_processor(kvm, attr);
1410 case KVM_S390_VM_CPU_MACHINE:
1411 ret = kvm_s390_get_machine(kvm, attr);
1413 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1414 ret = kvm_s390_get_processor_feat(kvm, attr);
1416 case KVM_S390_VM_CPU_MACHINE_FEAT:
1417 ret = kvm_s390_get_machine_feat(kvm, attr);
1419 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1420 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1422 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1423 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1429 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1433 switch (attr->group) {
1434 case KVM_S390_VM_MEM_CTRL:
1435 ret = kvm_s390_set_mem_control(kvm, attr);
1437 case KVM_S390_VM_TOD:
1438 ret = kvm_s390_set_tod(kvm, attr);
1440 case KVM_S390_VM_CPU_MODEL:
1441 ret = kvm_s390_set_cpu_model(kvm, attr);
1443 case KVM_S390_VM_CRYPTO:
1444 ret = kvm_s390_vm_set_crypto(kvm, attr);
1446 case KVM_S390_VM_MIGRATION:
1447 ret = kvm_s390_vm_set_migration(kvm, attr);
1457 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1461 switch (attr->group) {
1462 case KVM_S390_VM_MEM_CTRL:
1463 ret = kvm_s390_get_mem_control(kvm, attr);
1465 case KVM_S390_VM_TOD:
1466 ret = kvm_s390_get_tod(kvm, attr);
1468 case KVM_S390_VM_CPU_MODEL:
1469 ret = kvm_s390_get_cpu_model(kvm, attr);
1471 case KVM_S390_VM_MIGRATION:
1472 ret = kvm_s390_vm_get_migration(kvm, attr);
1482 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1486 switch (attr->group) {
1487 case KVM_S390_VM_MEM_CTRL:
1488 switch (attr->attr) {
1489 case KVM_S390_VM_MEM_ENABLE_CMMA:
1490 case KVM_S390_VM_MEM_CLR_CMMA:
1491 ret = sclp.has_cmma ? 0 : -ENXIO;
1493 case KVM_S390_VM_MEM_LIMIT_SIZE:
1501 case KVM_S390_VM_TOD:
1502 switch (attr->attr) {
1503 case KVM_S390_VM_TOD_LOW:
1504 case KVM_S390_VM_TOD_HIGH:
1512 case KVM_S390_VM_CPU_MODEL:
1513 switch (attr->attr) {
1514 case KVM_S390_VM_CPU_PROCESSOR:
1515 case KVM_S390_VM_CPU_MACHINE:
1516 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1517 case KVM_S390_VM_CPU_MACHINE_FEAT:
1518 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1521 /* configuring subfunctions is not supported yet */
1522 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1528 case KVM_S390_VM_CRYPTO:
1529 switch (attr->attr) {
1530 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1531 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1532 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1533 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1536 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1537 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1538 ret = ap_instructions_available() ? 0 : -ENXIO;
1545 case KVM_S390_VM_MIGRATION:
1556 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1560 int srcu_idx, i, r = 0;
1562 if (args->flags != 0)
1565 /* Is this guest using storage keys? */
1566 if (!mm_uses_skeys(current->mm))
1567 return KVM_S390_GET_SKEYS_NONE;
1569 /* Enforce sane limit on memory allocation */
1570 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1573 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1577 down_read(¤t->mm->mmap_sem);
1578 srcu_idx = srcu_read_lock(&kvm->srcu);
1579 for (i = 0; i < args->count; i++) {
1580 hva = gfn_to_hva(kvm, args->start_gfn + i);
1581 if (kvm_is_error_hva(hva)) {
1586 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1590 srcu_read_unlock(&kvm->srcu, srcu_idx);
1591 up_read(¤t->mm->mmap_sem);
1594 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1595 sizeof(uint8_t) * args->count);
1604 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1608 int srcu_idx, i, r = 0;
1611 if (args->flags != 0)
1614 /* Enforce sane limit on memory allocation */
1615 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1618 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1622 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1623 sizeof(uint8_t) * args->count);
1629 /* Enable storage key handling for the guest */
1630 r = s390_enable_skey();
1635 down_read(¤t->mm->mmap_sem);
1636 srcu_idx = srcu_read_lock(&kvm->srcu);
1637 while (i < args->count) {
1639 hva = gfn_to_hva(kvm, args->start_gfn + i);
1640 if (kvm_is_error_hva(hva)) {
1645 /* Lowest order bit is reserved */
1646 if (keys[i] & 0x01) {
1651 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1653 r = fixup_user_fault(current, current->mm, hva,
1654 FAULT_FLAG_WRITE, &unlocked);
1661 srcu_read_unlock(&kvm->srcu, srcu_idx);
1662 up_read(¤t->mm->mmap_sem);
1669 * Base address and length must be sent at the start of each block, therefore
1670 * it's cheaper to send some clean data, as long as it's less than the size of
1673 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1674 /* for consistency */
1675 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1678 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1679 * address falls in a hole. In that case the index of one of the memslots
1680 * bordering the hole is returned.
1682 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1684 int start = 0, end = slots->used_slots;
1685 int slot = atomic_read(&slots->lru_slot);
1686 struct kvm_memory_slot *memslots = slots->memslots;
1688 if (gfn >= memslots[slot].base_gfn &&
1689 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1692 while (start < end) {
1693 slot = start + (end - start) / 2;
1695 if (gfn >= memslots[slot].base_gfn)
1701 if (gfn >= memslots[start].base_gfn &&
1702 gfn < memslots[start].base_gfn + memslots[start].npages) {
1703 atomic_set(&slots->lru_slot, start);
1709 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1710 u8 *res, unsigned long bufsize)
1712 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1715 while (args->count < bufsize) {
1716 hva = gfn_to_hva(kvm, cur_gfn);
1718 * We return an error if the first value was invalid, but we
1719 * return successfully if at least one value was copied.
1721 if (kvm_is_error_hva(hva))
1722 return args->count ? 0 : -EFAULT;
1723 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1725 res[args->count++] = (pgstev >> 24) & 0x43;
1732 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1733 unsigned long cur_gfn)
1735 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1736 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1737 unsigned long ofs = cur_gfn - ms->base_gfn;
1739 if (ms->base_gfn + ms->npages <= cur_gfn) {
1741 /* If we are above the highest slot, wrap around */
1743 slotidx = slots->used_slots - 1;
1745 ms = slots->memslots + slotidx;
1748 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1749 while ((slotidx > 0) && (ofs >= ms->npages)) {
1751 ms = slots->memslots + slotidx;
1752 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1754 return ms->base_gfn + ofs;
1757 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1758 u8 *res, unsigned long bufsize)
1760 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1761 struct kvm_memslots *slots = kvm_memslots(kvm);
1762 struct kvm_memory_slot *ms;
1764 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1765 ms = gfn_to_memslot(kvm, cur_gfn);
1767 args->start_gfn = cur_gfn;
1770 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1771 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1773 while (args->count < bufsize) {
1774 hva = gfn_to_hva(kvm, cur_gfn);
1775 if (kvm_is_error_hva(hva))
1777 /* Decrement only if we actually flipped the bit to 0 */
1778 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1779 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1780 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1782 /* Save the value */
1783 res[args->count++] = (pgstev >> 24) & 0x43;
1784 /* If the next bit is too far away, stop. */
1785 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1787 /* If we reached the previous "next", find the next one */
1788 if (cur_gfn == next_gfn)
1789 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1790 /* Reached the end of memory or of the buffer, stop */
1791 if ((next_gfn >= mem_end) ||
1792 (next_gfn - args->start_gfn >= bufsize))
1795 /* Reached the end of the current memslot, take the next one. */
1796 if (cur_gfn - ms->base_gfn >= ms->npages) {
1797 ms = gfn_to_memslot(kvm, cur_gfn);
1806 * This function searches for the next page with dirty CMMA attributes, and
1807 * saves the attributes in the buffer up to either the end of the buffer or
1808 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1809 * no trailing clean bytes are saved.
1810 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1811 * output buffer will indicate 0 as length.
1813 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1814 struct kvm_s390_cmma_log *args)
1816 unsigned long bufsize;
1817 int srcu_idx, peek, ret;
1820 if (!kvm->arch.use_cmma)
1822 /* Invalid/unsupported flags were specified */
1823 if (args->flags & ~KVM_S390_CMMA_PEEK)
1825 /* Migration mode query, and we are not doing a migration */
1826 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1827 if (!peek && !kvm->arch.migration_mode)
1829 /* CMMA is disabled or was not used, or the buffer has length zero */
1830 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1831 if (!bufsize || !kvm->mm->context.uses_cmm) {
1832 memset(args, 0, sizeof(*args));
1835 /* We are not peeking, and there are no dirty pages */
1836 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1837 memset(args, 0, sizeof(*args));
1841 values = vmalloc(bufsize);
1845 down_read(&kvm->mm->mmap_sem);
1846 srcu_idx = srcu_read_lock(&kvm->srcu);
1848 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1850 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1851 srcu_read_unlock(&kvm->srcu, srcu_idx);
1852 up_read(&kvm->mm->mmap_sem);
1854 if (kvm->arch.migration_mode)
1855 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1857 args->remaining = 0;
1859 if (copy_to_user((void __user *)args->values, values, args->count))
1867 * This function sets the CMMA attributes for the given pages. If the input
1868 * buffer has zero length, no action is taken, otherwise the attributes are
1869 * set and the mm->context.uses_cmm flag is set.
1871 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1872 const struct kvm_s390_cmma_log *args)
1874 unsigned long hva, mask, pgstev, i;
1876 int srcu_idx, r = 0;
1880 if (!kvm->arch.use_cmma)
1882 /* invalid/unsupported flags */
1883 if (args->flags != 0)
1885 /* Enforce sane limit on memory allocation */
1886 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1889 if (args->count == 0)
1892 bits = vmalloc(array_size(sizeof(*bits), args->count));
1896 r = copy_from_user(bits, (void __user *)args->values, args->count);
1902 down_read(&kvm->mm->mmap_sem);
1903 srcu_idx = srcu_read_lock(&kvm->srcu);
1904 for (i = 0; i < args->count; i++) {
1905 hva = gfn_to_hva(kvm, args->start_gfn + i);
1906 if (kvm_is_error_hva(hva)) {
1912 pgstev = pgstev << 24;
1913 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1914 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1916 srcu_read_unlock(&kvm->srcu, srcu_idx);
1917 up_read(&kvm->mm->mmap_sem);
1919 if (!kvm->mm->context.uses_cmm) {
1920 down_write(&kvm->mm->mmap_sem);
1921 kvm->mm->context.uses_cmm = 1;
1922 up_write(&kvm->mm->mmap_sem);
1929 long kvm_arch_vm_ioctl(struct file *filp,
1930 unsigned int ioctl, unsigned long arg)
1932 struct kvm *kvm = filp->private_data;
1933 void __user *argp = (void __user *)arg;
1934 struct kvm_device_attr attr;
1938 case KVM_S390_INTERRUPT: {
1939 struct kvm_s390_interrupt s390int;
1942 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1944 r = kvm_s390_inject_vm(kvm, &s390int);
1947 case KVM_ENABLE_CAP: {
1948 struct kvm_enable_cap cap;
1950 if (copy_from_user(&cap, argp, sizeof(cap)))
1952 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1955 case KVM_CREATE_IRQCHIP: {
1956 struct kvm_irq_routing_entry routing;
1959 if (kvm->arch.use_irqchip) {
1960 /* Set up dummy routing. */
1961 memset(&routing, 0, sizeof(routing));
1962 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1966 case KVM_SET_DEVICE_ATTR: {
1968 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1970 r = kvm_s390_vm_set_attr(kvm, &attr);
1973 case KVM_GET_DEVICE_ATTR: {
1975 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1977 r = kvm_s390_vm_get_attr(kvm, &attr);
1980 case KVM_HAS_DEVICE_ATTR: {
1982 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1984 r = kvm_s390_vm_has_attr(kvm, &attr);
1987 case KVM_S390_GET_SKEYS: {
1988 struct kvm_s390_skeys args;
1991 if (copy_from_user(&args, argp,
1992 sizeof(struct kvm_s390_skeys)))
1994 r = kvm_s390_get_skeys(kvm, &args);
1997 case KVM_S390_SET_SKEYS: {
1998 struct kvm_s390_skeys args;
2001 if (copy_from_user(&args, argp,
2002 sizeof(struct kvm_s390_skeys)))
2004 r = kvm_s390_set_skeys(kvm, &args);
2007 case KVM_S390_GET_CMMA_BITS: {
2008 struct kvm_s390_cmma_log args;
2011 if (copy_from_user(&args, argp, sizeof(args)))
2013 mutex_lock(&kvm->slots_lock);
2014 r = kvm_s390_get_cmma_bits(kvm, &args);
2015 mutex_unlock(&kvm->slots_lock);
2017 r = copy_to_user(argp, &args, sizeof(args));
2023 case KVM_S390_SET_CMMA_BITS: {
2024 struct kvm_s390_cmma_log args;
2027 if (copy_from_user(&args, argp, sizeof(args)))
2029 mutex_lock(&kvm->slots_lock);
2030 r = kvm_s390_set_cmma_bits(kvm, &args);
2031 mutex_unlock(&kvm->slots_lock);
2041 static int kvm_s390_apxa_installed(void)
2043 struct ap_config_info info;
2045 if (ap_instructions_available()) {
2046 if (ap_qci(&info) == 0)
2054 * The format of the crypto control block (CRYCB) is specified in the 3 low
2055 * order bits of the CRYCB designation (CRYCBD) field as follows:
2056 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2057 * AP extended addressing (APXA) facility are installed.
2058 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2059 * Format 2: Both the APXA and MSAX3 facilities are installed
2061 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2063 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2065 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2066 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2068 /* Check whether MSAX3 is installed */
2069 if (!test_kvm_facility(kvm, 76))
2072 if (kvm_s390_apxa_installed())
2073 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2075 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2078 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2079 unsigned long *aqm, unsigned long *adm)
2081 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2083 mutex_lock(&kvm->lock);
2084 kvm_s390_vcpu_block_all(kvm);
2086 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2087 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2088 memcpy(crycb->apcb1.apm, apm, 32);
2089 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2090 apm[0], apm[1], apm[2], apm[3]);
2091 memcpy(crycb->apcb1.aqm, aqm, 32);
2092 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2093 aqm[0], aqm[1], aqm[2], aqm[3]);
2094 memcpy(crycb->apcb1.adm, adm, 32);
2095 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2096 adm[0], adm[1], adm[2], adm[3]);
2099 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2100 memcpy(crycb->apcb0.apm, apm, 8);
2101 memcpy(crycb->apcb0.aqm, aqm, 2);
2102 memcpy(crycb->apcb0.adm, adm, 2);
2103 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2104 apm[0], *((unsigned short *)aqm),
2105 *((unsigned short *)adm));
2107 default: /* Can not happen */
2111 /* recreate the shadow crycb for each vcpu */
2112 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2113 kvm_s390_vcpu_unblock_all(kvm);
2114 mutex_unlock(&kvm->lock);
2116 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2118 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2120 mutex_lock(&kvm->lock);
2121 kvm_s390_vcpu_block_all(kvm);
2123 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2124 sizeof(kvm->arch.crypto.crycb->apcb0));
2125 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2126 sizeof(kvm->arch.crypto.crycb->apcb1));
2128 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2129 /* recreate the shadow crycb for each vcpu */
2130 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2131 kvm_s390_vcpu_unblock_all(kvm);
2132 mutex_unlock(&kvm->lock);
2134 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2136 static u64 kvm_s390_get_initial_cpuid(void)
2141 cpuid.version = 0xff;
2142 return *((u64 *) &cpuid);
2145 static void kvm_s390_crypto_init(struct kvm *kvm)
2147 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2148 kvm_s390_set_crycb_format(kvm);
2150 if (!test_kvm_facility(kvm, 76))
2153 /* Enable AES/DEA protected key functions by default */
2154 kvm->arch.crypto.aes_kw = 1;
2155 kvm->arch.crypto.dea_kw = 1;
2156 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2157 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2158 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2159 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2162 static void sca_dispose(struct kvm *kvm)
2164 if (kvm->arch.use_esca)
2165 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2167 free_page((unsigned long)(kvm->arch.sca));
2168 kvm->arch.sca = NULL;
2171 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2173 gfp_t alloc_flags = GFP_KERNEL;
2175 char debug_name[16];
2176 static unsigned long sca_offset;
2179 #ifdef CONFIG_KVM_S390_UCONTROL
2180 if (type & ~KVM_VM_S390_UCONTROL)
2182 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2189 rc = s390_enable_sie();
2195 if (!sclp.has_64bscao)
2196 alloc_flags |= GFP_DMA;
2197 rwlock_init(&kvm->arch.sca_lock);
2198 /* start with basic SCA */
2199 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2202 spin_lock(&kvm_lock);
2204 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2206 kvm->arch.sca = (struct bsca_block *)
2207 ((char *) kvm->arch.sca + sca_offset);
2208 spin_unlock(&kvm_lock);
2210 sprintf(debug_name, "kvm-%u", current->pid);
2212 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2216 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2217 kvm->arch.sie_page2 =
2218 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2219 if (!kvm->arch.sie_page2)
2222 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2224 for (i = 0; i < kvm_s390_fac_size(); i++) {
2225 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2226 (kvm_s390_fac_base[i] |
2227 kvm_s390_fac_ext[i]);
2228 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2229 kvm_s390_fac_base[i];
2232 /* we are always in czam mode - even on pre z14 machines */
2233 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2234 set_kvm_facility(kvm->arch.model.fac_list, 138);
2235 /* we emulate STHYI in kvm */
2236 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2237 set_kvm_facility(kvm->arch.model.fac_list, 74);
2238 if (MACHINE_HAS_TLB_GUEST) {
2239 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2240 set_kvm_facility(kvm->arch.model.fac_list, 147);
2243 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2244 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2246 kvm_s390_crypto_init(kvm);
2248 mutex_init(&kvm->arch.float_int.ais_lock);
2249 spin_lock_init(&kvm->arch.float_int.lock);
2250 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2251 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2252 init_waitqueue_head(&kvm->arch.ipte_wq);
2253 mutex_init(&kvm->arch.ipte_mutex);
2255 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2256 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2258 if (type & KVM_VM_S390_UCONTROL) {
2259 kvm->arch.gmap = NULL;
2260 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2262 if (sclp.hamax == U64_MAX)
2263 kvm->arch.mem_limit = TASK_SIZE_MAX;
2265 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2267 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2268 if (!kvm->arch.gmap)
2270 kvm->arch.gmap->private = kvm;
2271 kvm->arch.gmap->pfault_enabled = 0;
2274 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2275 kvm->arch.use_skf = sclp.has_skey;
2276 spin_lock_init(&kvm->arch.start_stop_lock);
2277 kvm_s390_vsie_init(kvm);
2278 kvm_s390_gisa_init(kvm);
2279 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2283 free_page((unsigned long)kvm->arch.sie_page2);
2284 debug_unregister(kvm->arch.dbf);
2286 KVM_EVENT(3, "creation of vm failed: %d", rc);
2290 bool kvm_arch_has_vcpu_debugfs(void)
2295 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2300 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2302 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2303 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2304 kvm_s390_clear_local_irqs(vcpu);
2305 kvm_clear_async_pf_completion_queue(vcpu);
2306 if (!kvm_is_ucontrol(vcpu->kvm))
2309 if (kvm_is_ucontrol(vcpu->kvm))
2310 gmap_remove(vcpu->arch.gmap);
2312 if (vcpu->kvm->arch.use_cmma)
2313 kvm_s390_vcpu_unsetup_cmma(vcpu);
2314 free_page((unsigned long)(vcpu->arch.sie_block));
2316 kvm_vcpu_uninit(vcpu);
2317 kmem_cache_free(kvm_vcpu_cache, vcpu);
2320 static void kvm_free_vcpus(struct kvm *kvm)
2323 struct kvm_vcpu *vcpu;
2325 kvm_for_each_vcpu(i, vcpu, kvm)
2326 kvm_arch_vcpu_destroy(vcpu);
2328 mutex_lock(&kvm->lock);
2329 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2330 kvm->vcpus[i] = NULL;
2332 atomic_set(&kvm->online_vcpus, 0);
2333 mutex_unlock(&kvm->lock);
2336 void kvm_arch_destroy_vm(struct kvm *kvm)
2338 kvm_free_vcpus(kvm);
2340 debug_unregister(kvm->arch.dbf);
2341 kvm_s390_gisa_destroy(kvm);
2342 free_page((unsigned long)kvm->arch.sie_page2);
2343 if (!kvm_is_ucontrol(kvm))
2344 gmap_remove(kvm->arch.gmap);
2345 kvm_s390_destroy_adapters(kvm);
2346 kvm_s390_clear_float_irqs(kvm);
2347 kvm_s390_vsie_destroy(kvm);
2348 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2351 /* Section: vcpu related */
2352 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2354 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2355 if (!vcpu->arch.gmap)
2357 vcpu->arch.gmap->private = vcpu->kvm;
2362 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2364 if (!kvm_s390_use_sca_entries())
2366 read_lock(&vcpu->kvm->arch.sca_lock);
2367 if (vcpu->kvm->arch.use_esca) {
2368 struct esca_block *sca = vcpu->kvm->arch.sca;
2370 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2371 sca->cpu[vcpu->vcpu_id].sda = 0;
2373 struct bsca_block *sca = vcpu->kvm->arch.sca;
2375 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2376 sca->cpu[vcpu->vcpu_id].sda = 0;
2378 read_unlock(&vcpu->kvm->arch.sca_lock);
2381 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2383 if (!kvm_s390_use_sca_entries()) {
2384 struct bsca_block *sca = vcpu->kvm->arch.sca;
2386 /* we still need the basic sca for the ipte control */
2387 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2388 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2391 read_lock(&vcpu->kvm->arch.sca_lock);
2392 if (vcpu->kvm->arch.use_esca) {
2393 struct esca_block *sca = vcpu->kvm->arch.sca;
2395 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2396 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2397 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2398 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2399 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2401 struct bsca_block *sca = vcpu->kvm->arch.sca;
2403 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2404 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2405 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2406 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2408 read_unlock(&vcpu->kvm->arch.sca_lock);
2411 /* Basic SCA to Extended SCA data copy routines */
2412 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2415 d->sigp_ctrl.c = s->sigp_ctrl.c;
2416 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2419 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2423 d->ipte_control = s->ipte_control;
2425 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2426 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2429 static int sca_switch_to_extended(struct kvm *kvm)
2431 struct bsca_block *old_sca = kvm->arch.sca;
2432 struct esca_block *new_sca;
2433 struct kvm_vcpu *vcpu;
2434 unsigned int vcpu_idx;
2437 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2441 scaoh = (u32)((u64)(new_sca) >> 32);
2442 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2444 kvm_s390_vcpu_block_all(kvm);
2445 write_lock(&kvm->arch.sca_lock);
2447 sca_copy_b_to_e(new_sca, old_sca);
2449 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2450 vcpu->arch.sie_block->scaoh = scaoh;
2451 vcpu->arch.sie_block->scaol = scaol;
2452 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2454 kvm->arch.sca = new_sca;
2455 kvm->arch.use_esca = 1;
2457 write_unlock(&kvm->arch.sca_lock);
2458 kvm_s390_vcpu_unblock_all(kvm);
2460 free_page((unsigned long)old_sca);
2462 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2463 old_sca, kvm->arch.sca);
2467 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2471 if (!kvm_s390_use_sca_entries()) {
2472 if (id < KVM_MAX_VCPUS)
2476 if (id < KVM_S390_BSCA_CPU_SLOTS)
2478 if (!sclp.has_esca || !sclp.has_64bscao)
2481 mutex_lock(&kvm->lock);
2482 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2483 mutex_unlock(&kvm->lock);
2485 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2488 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2490 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2491 kvm_clear_async_pf_completion_queue(vcpu);
2492 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2498 kvm_s390_set_prefix(vcpu, 0);
2499 if (test_kvm_facility(vcpu->kvm, 64))
2500 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2501 if (test_kvm_facility(vcpu->kvm, 82))
2502 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2503 if (test_kvm_facility(vcpu->kvm, 133))
2504 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2505 if (test_kvm_facility(vcpu->kvm, 156))
2506 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2507 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2508 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2511 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2513 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2515 if (kvm_is_ucontrol(vcpu->kvm))
2516 return __kvm_ucontrol_vcpu_init(vcpu);
2521 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2522 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2524 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2525 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2526 vcpu->arch.cputm_start = get_tod_clock_fast();
2527 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2530 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2531 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2533 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2534 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2535 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2536 vcpu->arch.cputm_start = 0;
2537 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2540 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2541 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2543 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2544 vcpu->arch.cputm_enabled = true;
2545 __start_cpu_timer_accounting(vcpu);
2548 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2549 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2551 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2552 __stop_cpu_timer_accounting(vcpu);
2553 vcpu->arch.cputm_enabled = false;
2556 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2558 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2559 __enable_cpu_timer_accounting(vcpu);
2563 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2565 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2566 __disable_cpu_timer_accounting(vcpu);
2570 /* set the cpu timer - may only be called from the VCPU thread itself */
2571 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2573 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2574 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2575 if (vcpu->arch.cputm_enabled)
2576 vcpu->arch.cputm_start = get_tod_clock_fast();
2577 vcpu->arch.sie_block->cputm = cputm;
2578 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2582 /* update and get the cpu timer - can also be called from other VCPU threads */
2583 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2588 if (unlikely(!vcpu->arch.cputm_enabled))
2589 return vcpu->arch.sie_block->cputm;
2591 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2593 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2595 * If the writer would ever execute a read in the critical
2596 * section, e.g. in irq context, we have a deadlock.
2598 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2599 value = vcpu->arch.sie_block->cputm;
2600 /* if cputm_start is 0, accounting is being started/stopped */
2601 if (likely(vcpu->arch.cputm_start))
2602 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2603 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2608 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2611 gmap_enable(vcpu->arch.enabled_gmap);
2612 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2613 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2614 __start_cpu_timer_accounting(vcpu);
2618 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2621 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2622 __stop_cpu_timer_accounting(vcpu);
2623 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2624 vcpu->arch.enabled_gmap = gmap_get_enabled();
2625 gmap_disable(vcpu->arch.enabled_gmap);
2629 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2631 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2632 vcpu->arch.sie_block->gpsw.mask = 0UL;
2633 vcpu->arch.sie_block->gpsw.addr = 0UL;
2634 kvm_s390_set_prefix(vcpu, 0);
2635 kvm_s390_set_cpu_timer(vcpu, 0);
2636 vcpu->arch.sie_block->ckc = 0UL;
2637 vcpu->arch.sie_block->todpr = 0;
2638 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2639 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2640 CR0_INTERRUPT_KEY_SUBMASK |
2641 CR0_MEASUREMENT_ALERT_SUBMASK;
2642 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2644 CR14_EXTERNAL_DAMAGE_SUBMASK;
2645 /* make sure the new fpc will be lazily loaded */
2647 current->thread.fpu.fpc = 0;
2648 vcpu->arch.sie_block->gbea = 1;
2649 vcpu->arch.sie_block->pp = 0;
2650 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2651 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2652 kvm_clear_async_pf_completion_queue(vcpu);
2653 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2654 kvm_s390_vcpu_stop(vcpu);
2655 kvm_s390_clear_local_irqs(vcpu);
2658 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2660 mutex_lock(&vcpu->kvm->lock);
2662 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2663 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2665 mutex_unlock(&vcpu->kvm->lock);
2666 if (!kvm_is_ucontrol(vcpu->kvm)) {
2667 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2670 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2671 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2672 /* make vcpu_load load the right gmap on the first trigger */
2673 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2676 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2679 * If the AP instructions are not being interpreted and the MSAX3
2680 * facility is not configured for the guest, there is nothing to set up.
2682 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2685 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2686 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2687 vcpu->arch.sie_block->eca &= ~ECA_APIE;
2689 if (vcpu->kvm->arch.crypto.apie)
2690 vcpu->arch.sie_block->eca |= ECA_APIE;
2692 /* Set up protected key support */
2693 if (vcpu->kvm->arch.crypto.aes_kw)
2694 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2695 if (vcpu->kvm->arch.crypto.dea_kw)
2696 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2699 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2701 free_page(vcpu->arch.sie_block->cbrlo);
2702 vcpu->arch.sie_block->cbrlo = 0;
2705 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2707 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2708 if (!vcpu->arch.sie_block->cbrlo)
2713 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2715 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2717 vcpu->arch.sie_block->ibc = model->ibc;
2718 if (test_kvm_facility(vcpu->kvm, 7))
2719 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2722 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2726 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2730 if (test_kvm_facility(vcpu->kvm, 78))
2731 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2732 else if (test_kvm_facility(vcpu->kvm, 8))
2733 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2735 kvm_s390_vcpu_setup_model(vcpu);
2737 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2738 if (MACHINE_HAS_ESOP)
2739 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2740 if (test_kvm_facility(vcpu->kvm, 9))
2741 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2742 if (test_kvm_facility(vcpu->kvm, 73))
2743 vcpu->arch.sie_block->ecb |= ECB_TE;
2745 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2746 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2747 if (test_kvm_facility(vcpu->kvm, 130))
2748 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2749 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2751 vcpu->arch.sie_block->eca |= ECA_CEI;
2753 vcpu->arch.sie_block->eca |= ECA_IB;
2755 vcpu->arch.sie_block->eca |= ECA_SII;
2756 if (sclp.has_sigpif)
2757 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2758 if (test_kvm_facility(vcpu->kvm, 129)) {
2759 vcpu->arch.sie_block->eca |= ECA_VX;
2760 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2762 if (test_kvm_facility(vcpu->kvm, 139))
2763 vcpu->arch.sie_block->ecd |= ECD_MEF;
2764 if (test_kvm_facility(vcpu->kvm, 156))
2765 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2766 if (vcpu->arch.sie_block->gd) {
2767 vcpu->arch.sie_block->eca |= ECA_AIV;
2768 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2769 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2771 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2773 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2776 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2778 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2780 if (vcpu->kvm->arch.use_cmma) {
2781 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2785 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2786 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2788 vcpu->arch.sie_block->hpid = HPID_KVM;
2790 kvm_s390_vcpu_crypto_setup(vcpu);
2795 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2798 struct kvm_vcpu *vcpu;
2799 struct sie_page *sie_page;
2802 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2807 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2811 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2812 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2816 vcpu->arch.sie_block = &sie_page->sie_block;
2817 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2819 /* the real guest size will always be smaller than msl */
2820 vcpu->arch.sie_block->mso = 0;
2821 vcpu->arch.sie_block->msl = sclp.hamax;
2823 vcpu->arch.sie_block->icpua = id;
2824 spin_lock_init(&vcpu->arch.local_int.lock);
2825 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2826 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2827 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2828 seqcount_init(&vcpu->arch.cputm_seqcount);
2830 rc = kvm_vcpu_init(vcpu, kvm, id);
2832 goto out_free_sie_block;
2833 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2834 vcpu->arch.sie_block);
2835 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2839 free_page((unsigned long)(vcpu->arch.sie_block));
2841 kmem_cache_free(kvm_vcpu_cache, vcpu);
2846 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2848 return kvm_s390_vcpu_has_irq(vcpu, 0);
2851 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2853 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2856 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2858 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2862 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2864 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2867 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2869 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2873 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2875 return atomic_read(&vcpu->arch.sie_block->prog20) &
2876 (PROG_BLOCK_SIE | PROG_REQUEST);
2879 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2881 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2885 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2886 * If the CPU is not running (e.g. waiting as idle) the function will
2887 * return immediately. */
2888 void exit_sie(struct kvm_vcpu *vcpu)
2890 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2891 kvm_s390_vsie_kick(vcpu);
2892 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2896 /* Kick a guest cpu out of SIE to process a request synchronously */
2897 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2899 kvm_make_request(req, vcpu);
2900 kvm_s390_vcpu_request(vcpu);
2903 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2906 struct kvm *kvm = gmap->private;
2907 struct kvm_vcpu *vcpu;
2908 unsigned long prefix;
2911 if (gmap_is_shadow(gmap))
2913 if (start >= 1UL << 31)
2914 /* We are only interested in prefix pages */
2916 kvm_for_each_vcpu(i, vcpu, kvm) {
2917 /* match against both prefix pages */
2918 prefix = kvm_s390_get_prefix(vcpu);
2919 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2920 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2922 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2927 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2929 /* kvm common code refers to this, but never calls it */
2934 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2935 struct kvm_one_reg *reg)
2940 case KVM_REG_S390_TODPR:
2941 r = put_user(vcpu->arch.sie_block->todpr,
2942 (u32 __user *)reg->addr);
2944 case KVM_REG_S390_EPOCHDIFF:
2945 r = put_user(vcpu->arch.sie_block->epoch,
2946 (u64 __user *)reg->addr);
2948 case KVM_REG_S390_CPU_TIMER:
2949 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2950 (u64 __user *)reg->addr);
2952 case KVM_REG_S390_CLOCK_COMP:
2953 r = put_user(vcpu->arch.sie_block->ckc,
2954 (u64 __user *)reg->addr);
2956 case KVM_REG_S390_PFTOKEN:
2957 r = put_user(vcpu->arch.pfault_token,
2958 (u64 __user *)reg->addr);
2960 case KVM_REG_S390_PFCOMPARE:
2961 r = put_user(vcpu->arch.pfault_compare,
2962 (u64 __user *)reg->addr);
2964 case KVM_REG_S390_PFSELECT:
2965 r = put_user(vcpu->arch.pfault_select,
2966 (u64 __user *)reg->addr);
2968 case KVM_REG_S390_PP:
2969 r = put_user(vcpu->arch.sie_block->pp,
2970 (u64 __user *)reg->addr);
2972 case KVM_REG_S390_GBEA:
2973 r = put_user(vcpu->arch.sie_block->gbea,
2974 (u64 __user *)reg->addr);
2983 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2984 struct kvm_one_reg *reg)
2990 case KVM_REG_S390_TODPR:
2991 r = get_user(vcpu->arch.sie_block->todpr,
2992 (u32 __user *)reg->addr);
2994 case KVM_REG_S390_EPOCHDIFF:
2995 r = get_user(vcpu->arch.sie_block->epoch,
2996 (u64 __user *)reg->addr);
2998 case KVM_REG_S390_CPU_TIMER:
2999 r = get_user(val, (u64 __user *)reg->addr);
3001 kvm_s390_set_cpu_timer(vcpu, val);
3003 case KVM_REG_S390_CLOCK_COMP:
3004 r = get_user(vcpu->arch.sie_block->ckc,
3005 (u64 __user *)reg->addr);
3007 case KVM_REG_S390_PFTOKEN:
3008 r = get_user(vcpu->arch.pfault_token,
3009 (u64 __user *)reg->addr);
3010 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3011 kvm_clear_async_pf_completion_queue(vcpu);
3013 case KVM_REG_S390_PFCOMPARE:
3014 r = get_user(vcpu->arch.pfault_compare,
3015 (u64 __user *)reg->addr);
3017 case KVM_REG_S390_PFSELECT:
3018 r = get_user(vcpu->arch.pfault_select,
3019 (u64 __user *)reg->addr);
3021 case KVM_REG_S390_PP:
3022 r = get_user(vcpu->arch.sie_block->pp,
3023 (u64 __user *)reg->addr);
3025 case KVM_REG_S390_GBEA:
3026 r = get_user(vcpu->arch.sie_block->gbea,
3027 (u64 __user *)reg->addr);
3036 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3038 kvm_s390_vcpu_initial_reset(vcpu);
3042 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3045 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3050 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3053 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3058 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3059 struct kvm_sregs *sregs)
3063 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3064 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3070 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3071 struct kvm_sregs *sregs)
3075 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3076 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3082 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3088 if (test_fp_ctl(fpu->fpc)) {
3092 vcpu->run->s.regs.fpc = fpu->fpc;
3094 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3095 (freg_t *) fpu->fprs);
3097 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3104 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3108 /* make sure we have the latest values */
3111 convert_vx_to_fp((freg_t *) fpu->fprs,
3112 (__vector128 *) vcpu->run->s.regs.vrs);
3114 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3115 fpu->fpc = vcpu->run->s.regs.fpc;
3121 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3125 if (!is_vcpu_stopped(vcpu))
3128 vcpu->run->psw_mask = psw.mask;
3129 vcpu->run->psw_addr = psw.addr;
3134 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3135 struct kvm_translation *tr)
3137 return -EINVAL; /* not implemented yet */
3140 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3141 KVM_GUESTDBG_USE_HW_BP | \
3142 KVM_GUESTDBG_ENABLE)
3144 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3145 struct kvm_guest_debug *dbg)
3151 vcpu->guest_debug = 0;
3152 kvm_s390_clear_bp_data(vcpu);
3154 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3158 if (!sclp.has_gpere) {
3163 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3164 vcpu->guest_debug = dbg->control;
3165 /* enforce guest PER */
3166 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3168 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3169 rc = kvm_s390_import_bp_data(vcpu, dbg);
3171 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3172 vcpu->arch.guestdbg.last_bp = 0;
3176 vcpu->guest_debug = 0;
3177 kvm_s390_clear_bp_data(vcpu);
3178 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3186 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3187 struct kvm_mp_state *mp_state)
3193 /* CHECK_STOP and LOAD are not supported yet */
3194 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3195 KVM_MP_STATE_OPERATING;
3201 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3202 struct kvm_mp_state *mp_state)
3208 /* user space knows about this interface - let it control the state */
3209 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3211 switch (mp_state->mp_state) {
3212 case KVM_MP_STATE_STOPPED:
3213 kvm_s390_vcpu_stop(vcpu);
3215 case KVM_MP_STATE_OPERATING:
3216 kvm_s390_vcpu_start(vcpu);
3218 case KVM_MP_STATE_LOAD:
3219 case KVM_MP_STATE_CHECK_STOP:
3220 /* fall through - CHECK_STOP and LOAD are not supported yet */
3229 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3231 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3234 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3237 kvm_s390_vcpu_request_handled(vcpu);
3238 if (!kvm_request_pending(vcpu))
3241 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3242 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3243 * This ensures that the ipte instruction for this request has
3244 * already finished. We might race against a second unmapper that
3245 * wants to set the blocking bit. Lets just retry the request loop.
3247 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3249 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3250 kvm_s390_get_prefix(vcpu),
3251 PAGE_SIZE * 2, PROT_WRITE);
3253 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3259 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3260 vcpu->arch.sie_block->ihcpu = 0xffff;
3264 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3265 if (!ibs_enabled(vcpu)) {
3266 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3267 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3272 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3273 if (ibs_enabled(vcpu)) {
3274 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3275 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3280 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3281 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3285 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3287 * Disable CMM virtualization; we will emulate the ESSA
3288 * instruction manually, in order to provide additional
3289 * functionalities needed for live migration.
3291 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3295 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3297 * Re-enable CMM virtualization if CMMA is available and
3298 * CMM has been used.
3300 if ((vcpu->kvm->arch.use_cmma) &&
3301 (vcpu->kvm->mm->context.uses_cmm))
3302 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3306 /* nothing to do, just clear the request */
3307 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3308 /* we left the vsie handler, nothing to do, just clear the request */
3309 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3314 void kvm_s390_set_tod_clock(struct kvm *kvm,
3315 const struct kvm_s390_vm_tod_clock *gtod)
3317 struct kvm_vcpu *vcpu;
3318 struct kvm_s390_tod_clock_ext htod;
3321 mutex_lock(&kvm->lock);
3324 get_tod_clock_ext((char *)&htod);
3326 kvm->arch.epoch = gtod->tod - htod.tod;
3328 if (test_kvm_facility(kvm, 139)) {
3329 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3330 if (kvm->arch.epoch > gtod->tod)
3331 kvm->arch.epdx -= 1;
3334 kvm_s390_vcpu_block_all(kvm);
3335 kvm_for_each_vcpu(i, vcpu, kvm) {
3336 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3337 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3340 kvm_s390_vcpu_unblock_all(kvm);
3342 mutex_unlock(&kvm->lock);
3346 * kvm_arch_fault_in_page - fault-in guest page if necessary
3347 * @vcpu: The corresponding virtual cpu
3348 * @gpa: Guest physical address
3349 * @writable: Whether the page should be writable or not
3351 * Make sure that a guest page has been faulted-in on the host.
3353 * Return: Zero on success, negative error code otherwise.
3355 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3357 return gmap_fault(vcpu->arch.gmap, gpa,
3358 writable ? FAULT_FLAG_WRITE : 0);
3361 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3362 unsigned long token)
3364 struct kvm_s390_interrupt inti;
3365 struct kvm_s390_irq irq;
3368 irq.u.ext.ext_params2 = token;
3369 irq.type = KVM_S390_INT_PFAULT_INIT;
3370 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3372 inti.type = KVM_S390_INT_PFAULT_DONE;
3373 inti.parm64 = token;
3374 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3378 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3379 struct kvm_async_pf *work)
3381 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3382 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3385 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3386 struct kvm_async_pf *work)
3388 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3389 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3392 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3393 struct kvm_async_pf *work)
3395 /* s390 will always inject the page directly */
3398 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3401 * s390 will always inject the page directly,
3402 * but we still want check_async_completion to cleanup
3407 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3410 struct kvm_arch_async_pf arch;
3413 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3415 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3416 vcpu->arch.pfault_compare)
3418 if (psw_extint_disabled(vcpu))
3420 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3422 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3424 if (!vcpu->arch.gmap->pfault_enabled)
3427 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3428 hva += current->thread.gmap_addr & ~PAGE_MASK;
3429 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3432 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3436 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3441 * On s390 notifications for arriving pages will be delivered directly
3442 * to the guest but the house keeping for completed pfaults is
3443 * handled outside the worker.
3445 kvm_check_async_pf_completion(vcpu);
3447 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3448 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3453 if (test_cpu_flag(CIF_MCCK_PENDING))
3456 if (!kvm_is_ucontrol(vcpu->kvm)) {
3457 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3462 rc = kvm_s390_handle_requests(vcpu);
3466 if (guestdbg_enabled(vcpu)) {
3467 kvm_s390_backup_guest_per_regs(vcpu);
3468 kvm_s390_patch_guest_per_regs(vcpu);
3471 vcpu->arch.sie_block->icptcode = 0;
3472 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3473 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3474 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3479 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3481 struct kvm_s390_pgm_info pgm_info = {
3482 .code = PGM_ADDRESSING,
3487 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3488 trace_kvm_s390_sie_fault(vcpu);
3491 * We want to inject an addressing exception, which is defined as a
3492 * suppressing or terminating exception. However, since we came here
3493 * by a DAT access exception, the PSW still points to the faulting
3494 * instruction since DAT exceptions are nullifying. So we've got
3495 * to look up the current opcode to get the length of the instruction
3496 * to be able to forward the PSW.
3498 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3499 ilen = insn_length(opcode);
3503 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3504 * Forward by arbitrary ilc, injection will take care of
3505 * nullification if necessary.
3507 pgm_info = vcpu->arch.pgm;
3510 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3511 kvm_s390_forward_psw(vcpu, ilen);
3512 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3515 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3517 struct mcck_volatile_info *mcck_info;
3518 struct sie_page *sie_page;
3520 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3521 vcpu->arch.sie_block->icptcode);
3522 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3524 if (guestdbg_enabled(vcpu))
3525 kvm_s390_restore_guest_per_regs(vcpu);
3527 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3528 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3530 if (exit_reason == -EINTR) {
3531 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3532 sie_page = container_of(vcpu->arch.sie_block,
3533 struct sie_page, sie_block);
3534 mcck_info = &sie_page->mcck_info;
3535 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3539 if (vcpu->arch.sie_block->icptcode > 0) {
3540 int rc = kvm_handle_sie_intercept(vcpu);
3542 if (rc != -EOPNOTSUPP)
3544 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3545 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3546 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3547 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3549 } else if (exit_reason != -EFAULT) {
3550 vcpu->stat.exit_null++;
3552 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3553 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3554 vcpu->run->s390_ucontrol.trans_exc_code =
3555 current->thread.gmap_addr;
3556 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3558 } else if (current->thread.gmap_pfault) {
3559 trace_kvm_s390_major_guest_pfault(vcpu);
3560 current->thread.gmap_pfault = 0;
3561 if (kvm_arch_setup_async_pf(vcpu))
3563 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3565 return vcpu_post_run_fault_in_sie(vcpu);
3568 static int __vcpu_run(struct kvm_vcpu *vcpu)
3570 int rc, exit_reason;
3573 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3574 * ning the guest), so that memslots (and other stuff) are protected
3576 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3579 rc = vcpu_pre_run(vcpu);
3583 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3585 * As PF_VCPU will be used in fault handler, between
3586 * guest_enter and guest_exit should be no uaccess.
3588 local_irq_disable();
3589 guest_enter_irqoff();
3590 __disable_cpu_timer_accounting(vcpu);
3592 exit_reason = sie64a(vcpu->arch.sie_block,
3593 vcpu->run->s.regs.gprs);
3594 local_irq_disable();
3595 __enable_cpu_timer_accounting(vcpu);
3596 guest_exit_irqoff();
3598 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3600 rc = vcpu_post_run(vcpu, exit_reason);
3601 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3603 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3607 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3609 struct runtime_instr_cb *riccb;
3612 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3613 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3614 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3615 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3616 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3617 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3618 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3619 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3620 /* some control register changes require a tlb flush */
3621 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3623 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3624 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3625 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3626 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3627 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3628 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3630 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3631 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3632 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3633 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3634 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3635 kvm_clear_async_pf_completion_queue(vcpu);
3638 * If userspace sets the riccb (e.g. after migration) to a valid state,
3639 * we should enable RI here instead of doing the lazy enablement.
3641 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3642 test_kvm_facility(vcpu->kvm, 64) &&
3644 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3645 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3646 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3649 * If userspace sets the gscb (e.g. after migration) to non-zero,
3650 * we should enable GS here instead of doing the lazy enablement.
3652 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3653 test_kvm_facility(vcpu->kvm, 133) &&
3655 !vcpu->arch.gs_enabled) {
3656 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3657 vcpu->arch.sie_block->ecb |= ECB_GS;
3658 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3659 vcpu->arch.gs_enabled = 1;
3661 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3662 test_kvm_facility(vcpu->kvm, 82)) {
3663 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3664 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3666 save_access_regs(vcpu->arch.host_acrs);
3667 restore_access_regs(vcpu->run->s.regs.acrs);
3668 /* save host (userspace) fprs/vrs */
3670 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3671 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3673 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3675 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3676 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3677 if (test_fp_ctl(current->thread.fpu.fpc))
3678 /* User space provided an invalid FPC, let's clear it */
3679 current->thread.fpu.fpc = 0;
3680 if (MACHINE_HAS_GS) {
3682 __ctl_set_bit(2, 4);
3683 if (current->thread.gs_cb) {
3684 vcpu->arch.host_gscb = current->thread.gs_cb;
3685 save_gs_cb(vcpu->arch.host_gscb);
3687 if (vcpu->arch.gs_enabled) {
3688 current->thread.gs_cb = (struct gs_cb *)
3689 &vcpu->run->s.regs.gscb;
3690 restore_gs_cb(current->thread.gs_cb);
3694 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3696 kvm_run->kvm_dirty_regs = 0;
3699 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3701 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3702 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3703 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3704 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3705 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3706 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3707 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3708 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3709 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3710 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3711 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3712 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3713 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3714 save_access_regs(vcpu->run->s.regs.acrs);
3715 restore_access_regs(vcpu->arch.host_acrs);
3716 /* Save guest register state */
3718 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3719 /* Restore will be done lazily at return */
3720 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3721 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3722 if (MACHINE_HAS_GS) {
3723 __ctl_set_bit(2, 4);
3724 if (vcpu->arch.gs_enabled)
3725 save_gs_cb(current->thread.gs_cb);
3727 current->thread.gs_cb = vcpu->arch.host_gscb;
3728 restore_gs_cb(vcpu->arch.host_gscb);
3730 if (!vcpu->arch.host_gscb)
3731 __ctl_clear_bit(2, 4);
3732 vcpu->arch.host_gscb = NULL;
3734 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3737 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3741 if (kvm_run->immediate_exit)
3746 if (guestdbg_exit_pending(vcpu)) {
3747 kvm_s390_prepare_debug_exit(vcpu);
3752 kvm_sigset_activate(vcpu);
3754 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3755 kvm_s390_vcpu_start(vcpu);
3756 } else if (is_vcpu_stopped(vcpu)) {
3757 pr_err_ratelimited("can't run stopped vcpu %d\n",
3763 sync_regs(vcpu, kvm_run);
3764 enable_cpu_timer_accounting(vcpu);
3767 rc = __vcpu_run(vcpu);
3769 if (signal_pending(current) && !rc) {
3770 kvm_run->exit_reason = KVM_EXIT_INTR;
3774 if (guestdbg_exit_pending(vcpu) && !rc) {
3775 kvm_s390_prepare_debug_exit(vcpu);
3779 if (rc == -EREMOTE) {
3780 /* userspace support is needed, kvm_run has been prepared */
3784 disable_cpu_timer_accounting(vcpu);
3785 store_regs(vcpu, kvm_run);
3787 kvm_sigset_deactivate(vcpu);
3789 vcpu->stat.exit_userspace++;
3796 * store status at address
3797 * we use have two special cases:
3798 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3799 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3801 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3803 unsigned char archmode = 1;
3804 freg_t fprs[NUM_FPRS];
3809 px = kvm_s390_get_prefix(vcpu);
3810 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3811 if (write_guest_abs(vcpu, 163, &archmode, 1))
3814 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3815 if (write_guest_real(vcpu, 163, &archmode, 1))
3819 gpa -= __LC_FPREGS_SAVE_AREA;
3821 /* manually convert vector registers if necessary */
3822 if (MACHINE_HAS_VX) {
3823 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3824 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3827 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3828 vcpu->run->s.regs.fprs, 128);
3830 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3831 vcpu->run->s.regs.gprs, 128);
3832 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3833 &vcpu->arch.sie_block->gpsw, 16);
3834 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3836 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3837 &vcpu->run->s.regs.fpc, 4);
3838 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3839 &vcpu->arch.sie_block->todpr, 4);
3840 cputm = kvm_s390_get_cpu_timer(vcpu);
3841 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3843 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3844 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3846 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3847 &vcpu->run->s.regs.acrs, 64);
3848 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3849 &vcpu->arch.sie_block->gcr, 128);
3850 return rc ? -EFAULT : 0;
3853 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3856 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3857 * switch in the run ioctl. Let's update our copies before we save
3858 * it into the save area
3861 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3862 save_access_regs(vcpu->run->s.regs.acrs);
3864 return kvm_s390_store_status_unloaded(vcpu, addr);
3867 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3869 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3870 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3873 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3876 struct kvm_vcpu *vcpu;
3878 kvm_for_each_vcpu(i, vcpu, kvm) {
3879 __disable_ibs_on_vcpu(vcpu);
3883 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3887 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3888 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3891 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3893 int i, online_vcpus, started_vcpus = 0;
3895 if (!is_vcpu_stopped(vcpu))
3898 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3899 /* Only one cpu at a time may enter/leave the STOPPED state. */
3900 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3901 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3903 for (i = 0; i < online_vcpus; i++) {
3904 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3908 if (started_vcpus == 0) {
3909 /* we're the only active VCPU -> speed it up */
3910 __enable_ibs_on_vcpu(vcpu);
3911 } else if (started_vcpus == 1) {
3913 * As we are starting a second VCPU, we have to disable
3914 * the IBS facility on all VCPUs to remove potentially
3915 * oustanding ENABLE requests.
3917 __disable_ibs_on_all_vcpus(vcpu->kvm);
3920 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3922 * Another VCPU might have used IBS while we were offline.
3923 * Let's play safe and flush the VCPU at startup.
3925 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3926 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3930 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3932 int i, online_vcpus, started_vcpus = 0;
3933 struct kvm_vcpu *started_vcpu = NULL;
3935 if (is_vcpu_stopped(vcpu))
3938 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3939 /* Only one cpu at a time may enter/leave the STOPPED state. */
3940 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3941 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3943 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3944 kvm_s390_clear_stop_irq(vcpu);
3946 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3947 __disable_ibs_on_vcpu(vcpu);
3949 for (i = 0; i < online_vcpus; i++) {
3950 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3952 started_vcpu = vcpu->kvm->vcpus[i];
3956 if (started_vcpus == 1) {
3958 * As we only have one VCPU left, we want to enable the
3959 * IBS facility for that VCPU to speed it up.
3961 __enable_ibs_on_vcpu(started_vcpu);
3964 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3968 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3969 struct kvm_enable_cap *cap)
3977 case KVM_CAP_S390_CSS_SUPPORT:
3978 if (!vcpu->kvm->arch.css_support) {
3979 vcpu->kvm->arch.css_support = 1;
3980 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3981 trace_kvm_s390_enable_css(vcpu->kvm);
3992 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3993 struct kvm_s390_mem_op *mop)
3995 void __user *uaddr = (void __user *)mop->buf;
3996 void *tmpbuf = NULL;
3998 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3999 | KVM_S390_MEMOP_F_CHECK_ONLY;
4001 if (mop->flags & ~supported_flags)
4004 if (mop->size > MEM_OP_MAX_SIZE)
4007 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4008 tmpbuf = vmalloc(mop->size);
4013 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4016 case KVM_S390_MEMOP_LOGICAL_READ:
4017 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4018 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4019 mop->size, GACC_FETCH);
4022 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4024 if (copy_to_user(uaddr, tmpbuf, mop->size))
4028 case KVM_S390_MEMOP_LOGICAL_WRITE:
4029 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4030 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4031 mop->size, GACC_STORE);
4034 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4038 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4044 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4046 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4047 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4053 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4054 unsigned int ioctl, unsigned long arg)
4056 struct kvm_vcpu *vcpu = filp->private_data;
4057 void __user *argp = (void __user *)arg;
4060 case KVM_S390_IRQ: {
4061 struct kvm_s390_irq s390irq;
4063 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4065 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4067 case KVM_S390_INTERRUPT: {
4068 struct kvm_s390_interrupt s390int;
4069 struct kvm_s390_irq s390irq;
4071 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4073 if (s390int_to_s390irq(&s390int, &s390irq))
4075 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4078 return -ENOIOCTLCMD;
4081 long kvm_arch_vcpu_ioctl(struct file *filp,
4082 unsigned int ioctl, unsigned long arg)
4084 struct kvm_vcpu *vcpu = filp->private_data;
4085 void __user *argp = (void __user *)arg;
4092 case KVM_S390_STORE_STATUS:
4093 idx = srcu_read_lock(&vcpu->kvm->srcu);
4094 r = kvm_s390_vcpu_store_status(vcpu, arg);
4095 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4097 case KVM_S390_SET_INITIAL_PSW: {
4101 if (copy_from_user(&psw, argp, sizeof(psw)))
4103 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4106 case KVM_S390_INITIAL_RESET:
4107 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4109 case KVM_SET_ONE_REG:
4110 case KVM_GET_ONE_REG: {
4111 struct kvm_one_reg reg;
4113 if (copy_from_user(®, argp, sizeof(reg)))
4115 if (ioctl == KVM_SET_ONE_REG)
4116 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4118 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4121 #ifdef CONFIG_KVM_S390_UCONTROL
4122 case KVM_S390_UCAS_MAP: {
4123 struct kvm_s390_ucas_mapping ucasmap;
4125 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4130 if (!kvm_is_ucontrol(vcpu->kvm)) {
4135 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4136 ucasmap.vcpu_addr, ucasmap.length);
4139 case KVM_S390_UCAS_UNMAP: {
4140 struct kvm_s390_ucas_mapping ucasmap;
4142 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4147 if (!kvm_is_ucontrol(vcpu->kvm)) {
4152 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4157 case KVM_S390_VCPU_FAULT: {
4158 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4161 case KVM_ENABLE_CAP:
4163 struct kvm_enable_cap cap;
4165 if (copy_from_user(&cap, argp, sizeof(cap)))
4167 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4170 case KVM_S390_MEM_OP: {
4171 struct kvm_s390_mem_op mem_op;
4173 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4174 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4179 case KVM_S390_SET_IRQ_STATE: {
4180 struct kvm_s390_irq_state irq_state;
4183 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4185 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4186 irq_state.len == 0 ||
4187 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4191 /* do not use irq_state.flags, it will break old QEMUs */
4192 r = kvm_s390_set_irq_state(vcpu,
4193 (void __user *) irq_state.buf,
4197 case KVM_S390_GET_IRQ_STATE: {
4198 struct kvm_s390_irq_state irq_state;
4201 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4203 if (irq_state.len == 0) {
4207 /* do not use irq_state.flags, it will break old QEMUs */
4208 r = kvm_s390_get_irq_state(vcpu,
4209 (__u8 __user *) irq_state.buf,
4221 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4223 #ifdef CONFIG_KVM_S390_UCONTROL
4224 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4225 && (kvm_is_ucontrol(vcpu->kvm))) {
4226 vmf->page = virt_to_page(vcpu->arch.sie_block);
4227 get_page(vmf->page);
4231 return VM_FAULT_SIGBUS;
4234 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4235 unsigned long npages)
4240 /* Section: memory related */
4241 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4242 struct kvm_memory_slot *memslot,
4243 const struct kvm_userspace_memory_region *mem,
4244 enum kvm_mr_change change)
4246 /* A few sanity checks. We can have memory slots which have to be
4247 located/ended at a segment boundary (1MB). The memory in userland is
4248 ok to be fragmented into various different vmas. It is okay to mmap()
4249 and munmap() stuff in this slot after doing this call at any time */
4251 if (mem->userspace_addr & 0xffffful)
4254 if (mem->memory_size & 0xffffful)
4257 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4263 void kvm_arch_commit_memory_region(struct kvm *kvm,
4264 const struct kvm_userspace_memory_region *mem,
4265 const struct kvm_memory_slot *old,
4266 const struct kvm_memory_slot *new,
4267 enum kvm_mr_change change)
4271 /* If the basics of the memslot do not change, we do not want
4272 * to update the gmap. Every update causes several unnecessary
4273 * segment translation exceptions. This is usually handled just
4274 * fine by the normal fault handler + gmap, but it will also
4275 * cause faults on the prefix page of running guest CPUs.
4277 if (old->userspace_addr == mem->userspace_addr &&
4278 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4279 old->npages * PAGE_SIZE == mem->memory_size)
4282 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4283 mem->guest_phys_addr, mem->memory_size);
4285 pr_warn("failed to commit memory region\n");
4289 static inline unsigned long nonhyp_mask(int i)
4291 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4293 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4296 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4298 vcpu->valid_wakeup = false;
4301 static int __init kvm_s390_init(void)
4305 if (!sclp.has_sief2) {
4306 pr_info("SIE not available\n");
4310 if (nested && hpage) {
4311 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4315 for (i = 0; i < 16; i++)
4316 kvm_s390_fac_base[i] |=
4317 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4319 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4322 static void __exit kvm_s390_exit(void)
4327 module_init(kvm_s390_init);
4328 module_exit(kvm_s390_exit);
4331 * Enable autoloading of the kvm module.
4332 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4333 * since x86 takes a different approach.
4335 #include <linux/miscdevice.h>
4336 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4337 MODULE_ALIAS("devname:kvm");