OSDN Git Service

KVM: s390: Do not leak kernel stack data in the KVM_S390_INTERRUPT ioctl
[android-x86/kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
33 #include <asm/stp.h>
34 #include <asm/pgtable.h>
35 #include <asm/gmap.h>
36 #include <asm/nmi.h>
37 #include <asm/switch_to.h>
38 #include <asm/isc.h>
39 #include <asm/sclp.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
42 #include "kvm-s390.h"
43 #include "gaccess.h"
44
45 #define KMSG_COMPONENT "kvm-s390"
46 #undef pr_fmt
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49 #define CREATE_TRACE_POINTS
50 #include "trace.h"
51 #include "trace-s390.h"
52
53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define LOCAL_IRQS 32
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         { "userspace_handled", VCPU_STAT(exit_userspace) },
62         { "exit_null", VCPU_STAT(exit_null) },
63         { "exit_validity", VCPU_STAT(exit_validity) },
64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65         { "exit_external_request", VCPU_STAT(exit_external_request) },
66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67         { "exit_instruction", VCPU_STAT(exit_instruction) },
68         { "exit_pei", VCPU_STAT(exit_pei) },
69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91         { "instruction_spx", VCPU_STAT(instruction_spx) },
92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93         { "instruction_stap", VCPU_STAT(instruction_stap) },
94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98         { "instruction_essa", VCPU_STAT(instruction_essa) },
99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103         { "instruction_sie", VCPU_STAT(instruction_sie) },
104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120         { "diagnose_10", VCPU_STAT(diagnose_10) },
121         { "diagnose_44", VCPU_STAT(diagnose_44) },
122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123         { "diagnose_258", VCPU_STAT(diagnose_258) },
124         { "diagnose_308", VCPU_STAT(diagnose_308) },
125         { "diagnose_500", VCPU_STAT(diagnose_500) },
126         { NULL }
127 };
128
129 /* allow nested virtualization in KVM (if enabled by user space) */
130 static int nested;
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137 unsigned long kvm_s390_fac_list_mask_size(void)
138 {
139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
141 }
142
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
151
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
154 {
155         /* every s390 is virtualization enabled ;-) */
156         return 0;
157 }
158
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160                               unsigned long end);
161
162 /*
163  * This callback is executed during stop_machine(). All CPUs are therefore
164  * temporarily stopped. In order not to change guest behavior, we have to
165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166  * so a CPU won't be stopped while calculating with the epoch.
167  */
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169                           void *v)
170 {
171         struct kvm *kvm;
172         struct kvm_vcpu *vcpu;
173         int i;
174         unsigned long long *delta = v;
175
176         list_for_each_entry(kvm, &vm_list, vm_list) {
177                 kvm->arch.epoch -= *delta;
178                 kvm_for_each_vcpu(i, vcpu, kvm) {
179                         vcpu->arch.sie_block->epoch -= *delta;
180                         if (vcpu->arch.cputm_enabled)
181                                 vcpu->arch.cputm_start += *delta;
182                         if (vcpu->arch.vsie_block)
183                                 vcpu->arch.vsie_block->epoch -= *delta;
184                 }
185         }
186         return NOTIFY_OK;
187 }
188
189 static struct notifier_block kvm_clock_notifier = {
190         .notifier_call = kvm_clock_sync,
191 };
192
193 int kvm_arch_hardware_setup(void)
194 {
195         gmap_notifier.notifier_call = kvm_gmap_notifier;
196         gmap_register_pte_notifier(&gmap_notifier);
197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198         gmap_register_pte_notifier(&vsie_gmap_notifier);
199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200                                        &kvm_clock_notifier);
201         return 0;
202 }
203
204 void kvm_arch_hardware_unsetup(void)
205 {
206         gmap_unregister_pte_notifier(&gmap_notifier);
207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209                                          &kvm_clock_notifier);
210 }
211
212 static void allow_cpu_feat(unsigned long nr)
213 {
214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
215 }
216
217 static inline int plo_test_bit(unsigned char nr)
218 {
219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220         int cc = 3; /* subfunction not available */
221
222         asm volatile(
223                 /* Parameter registers are ignored for "test bit" */
224                 "       plo     0,0,0,0(0)\n"
225                 "       ipm     %0\n"
226                 "       srl     %0,28\n"
227                 : "=d" (cc)
228                 : "d" (r0)
229                 : "cc");
230         return cc == 0;
231 }
232
233 static void kvm_s390_cpu_feat_init(void)
234 {
235         int i;
236
237         for (i = 0; i < 256; ++i) {
238                 if (plo_test_bit(i))
239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240         }
241
242         if (test_facility(28)) /* TOD-clock steering */
243                 ptff(kvm_s390_available_subfunc.ptff,
244                      sizeof(kvm_s390_available_subfunc.ptff),
245                      PTFF_QAF);
246
247         if (test_facility(17)) { /* MSA */
248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249                               kvm_s390_available_subfunc.kmac);
250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251                               kvm_s390_available_subfunc.kmc);
252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.km);
254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kimd);
256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.klmd);
258         }
259         if (test_facility(76)) /* MSA3 */
260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.pckmo);
262         if (test_facility(77)) { /* MSA4 */
263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.kmctr);
265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266                               kvm_s390_available_subfunc.kmf);
267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmo);
269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.pcc);
271         }
272         if (test_facility(57)) /* MSA5 */
273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.ppno);
275
276         if (MACHINE_HAS_ESOP)
277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278         /*
279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281          */
282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283             !test_facility(3) || !nested)
284                 return;
285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286         if (sclp.has_64bscao)
287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288         if (sclp.has_siif)
289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290         if (sclp.has_gpere)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292         if (sclp.has_gsls)
293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294         if (sclp.has_ib)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296         if (sclp.has_cei)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298         if (sclp.has_ibs)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300         /*
301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302          * all skey handling functions read/set the skey from the PGSTE
303          * instead of the real storage key.
304          *
305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306          * pages being detected as preserved although they are resident.
307          *
308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310          *
311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314          *
315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316          * cannot easily shadow the SCA because of the ipte lock.
317          */
318 }
319
320 int kvm_arch_init(void *opaque)
321 {
322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323         if (!kvm_s390_dbf)
324                 return -ENOMEM;
325
326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327                 debug_unregister(kvm_s390_dbf);
328                 return -ENOMEM;
329         }
330
331         kvm_s390_cpu_feat_init();
332
333         /* Register floating interrupt controller interface. */
334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335 }
336
337 void kvm_arch_exit(void)
338 {
339         debug_unregister(kvm_s390_dbf);
340 }
341
342 /* Section: device related */
343 long kvm_arch_dev_ioctl(struct file *filp,
344                         unsigned int ioctl, unsigned long arg)
345 {
346         if (ioctl == KVM_S390_ENABLE_SIE)
347                 return s390_enable_sie();
348         return -EINVAL;
349 }
350
351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352 {
353         int r;
354
355         switch (ext) {
356         case KVM_CAP_S390_PSW:
357         case KVM_CAP_S390_GMAP:
358         case KVM_CAP_SYNC_MMU:
359 #ifdef CONFIG_KVM_S390_UCONTROL
360         case KVM_CAP_S390_UCONTROL:
361 #endif
362         case KVM_CAP_ASYNC_PF:
363         case KVM_CAP_SYNC_REGS:
364         case KVM_CAP_ONE_REG:
365         case KVM_CAP_ENABLE_CAP:
366         case KVM_CAP_S390_CSS_SUPPORT:
367         case KVM_CAP_IOEVENTFD:
368         case KVM_CAP_DEVICE_CTRL:
369         case KVM_CAP_ENABLE_CAP_VM:
370         case KVM_CAP_S390_IRQCHIP:
371         case KVM_CAP_VM_ATTRIBUTES:
372         case KVM_CAP_MP_STATE:
373         case KVM_CAP_S390_INJECT_IRQ:
374         case KVM_CAP_S390_USER_SIGP:
375         case KVM_CAP_S390_USER_STSI:
376         case KVM_CAP_S390_SKEYS:
377         case KVM_CAP_S390_IRQ_STATE:
378         case KVM_CAP_S390_USER_INSTR0:
379                 r = 1;
380                 break;
381         case KVM_CAP_S390_MEM_OP:
382                 r = MEM_OP_MAX_SIZE;
383                 break;
384         case KVM_CAP_NR_VCPUS:
385         case KVM_CAP_MAX_VCPUS:
386                 r = KVM_S390_BSCA_CPU_SLOTS;
387                 if (!kvm_s390_use_sca_entries())
388                         r = KVM_MAX_VCPUS;
389                 else if (sclp.has_esca && sclp.has_64bscao)
390                         r = KVM_S390_ESCA_CPU_SLOTS;
391                 break;
392         case KVM_CAP_NR_MEMSLOTS:
393                 r = KVM_USER_MEM_SLOTS;
394                 break;
395         case KVM_CAP_S390_COW:
396                 r = MACHINE_HAS_ESOP;
397                 break;
398         case KVM_CAP_S390_VECTOR_REGISTERS:
399                 r = MACHINE_HAS_VX;
400                 break;
401         case KVM_CAP_S390_RI:
402                 r = test_facility(64);
403                 break;
404         case KVM_CAP_S390_BPB:
405                 r = test_facility(82);
406                 break;
407         default:
408                 r = 0;
409         }
410         return r;
411 }
412
413 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
414                                         struct kvm_memory_slot *memslot)
415 {
416         gfn_t cur_gfn, last_gfn;
417         unsigned long address;
418         struct gmap *gmap = kvm->arch.gmap;
419
420         /* Loop over all guest pages */
421         last_gfn = memslot->base_gfn + memslot->npages;
422         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
423                 address = gfn_to_hva_memslot(memslot, cur_gfn);
424
425                 if (test_and_clear_guest_dirty(gmap->mm, address))
426                         mark_page_dirty(kvm, cur_gfn);
427                 if (fatal_signal_pending(current))
428                         return;
429                 cond_resched();
430         }
431 }
432
433 /* Section: vm related */
434 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
435
436 /*
437  * Get (and clear) the dirty memory log for a memory slot.
438  */
439 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
440                                struct kvm_dirty_log *log)
441 {
442         int r;
443         unsigned long n;
444         struct kvm_memslots *slots;
445         struct kvm_memory_slot *memslot;
446         int is_dirty = 0;
447
448         if (kvm_is_ucontrol(kvm))
449                 return -EINVAL;
450
451         mutex_lock(&kvm->slots_lock);
452
453         r = -EINVAL;
454         if (log->slot >= KVM_USER_MEM_SLOTS)
455                 goto out;
456
457         slots = kvm_memslots(kvm);
458         memslot = id_to_memslot(slots, log->slot);
459         r = -ENOENT;
460         if (!memslot->dirty_bitmap)
461                 goto out;
462
463         kvm_s390_sync_dirty_log(kvm, memslot);
464         r = kvm_get_dirty_log(kvm, log, &is_dirty);
465         if (r)
466                 goto out;
467
468         /* Clear the dirty log */
469         if (is_dirty) {
470                 n = kvm_dirty_bitmap_bytes(memslot);
471                 memset(memslot->dirty_bitmap, 0, n);
472         }
473         r = 0;
474 out:
475         mutex_unlock(&kvm->slots_lock);
476         return r;
477 }
478
479 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
480 {
481         unsigned int i;
482         struct kvm_vcpu *vcpu;
483
484         kvm_for_each_vcpu(i, vcpu, kvm) {
485                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
486         }
487 }
488
489 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
490 {
491         int r;
492
493         if (cap->flags)
494                 return -EINVAL;
495
496         switch (cap->cap) {
497         case KVM_CAP_S390_IRQCHIP:
498                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
499                 kvm->arch.use_irqchip = 1;
500                 r = 0;
501                 break;
502         case KVM_CAP_S390_USER_SIGP:
503                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
504                 kvm->arch.user_sigp = 1;
505                 r = 0;
506                 break;
507         case KVM_CAP_S390_VECTOR_REGISTERS:
508                 mutex_lock(&kvm->lock);
509                 if (kvm->created_vcpus) {
510                         r = -EBUSY;
511                 } else if (MACHINE_HAS_VX) {
512                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
513                         set_kvm_facility(kvm->arch.model.fac_list, 129);
514                         r = 0;
515                 } else
516                         r = -EINVAL;
517                 mutex_unlock(&kvm->lock);
518                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
519                          r ? "(not available)" : "(success)");
520                 break;
521         case KVM_CAP_S390_RI:
522                 r = -EINVAL;
523                 mutex_lock(&kvm->lock);
524                 if (kvm->created_vcpus) {
525                         r = -EBUSY;
526                 } else if (test_facility(64)) {
527                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
528                         set_kvm_facility(kvm->arch.model.fac_list, 64);
529                         r = 0;
530                 }
531                 mutex_unlock(&kvm->lock);
532                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
533                          r ? "(not available)" : "(success)");
534                 break;
535         case KVM_CAP_S390_USER_STSI:
536                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
537                 kvm->arch.user_stsi = 1;
538                 r = 0;
539                 break;
540         case KVM_CAP_S390_USER_INSTR0:
541                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
542                 kvm->arch.user_instr0 = 1;
543                 icpt_operexc_on_all_vcpus(kvm);
544                 r = 0;
545                 break;
546         default:
547                 r = -EINVAL;
548                 break;
549         }
550         return r;
551 }
552
553 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
554 {
555         int ret;
556
557         switch (attr->attr) {
558         case KVM_S390_VM_MEM_LIMIT_SIZE:
559                 ret = 0;
560                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
561                          kvm->arch.mem_limit);
562                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
563                         ret = -EFAULT;
564                 break;
565         default:
566                 ret = -ENXIO;
567                 break;
568         }
569         return ret;
570 }
571
572 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
573 {
574         int ret;
575         unsigned int idx;
576         switch (attr->attr) {
577         case KVM_S390_VM_MEM_ENABLE_CMMA:
578                 ret = -ENXIO;
579                 if (!sclp.has_cmma)
580                         break;
581
582                 ret = -EBUSY;
583                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
584                 mutex_lock(&kvm->lock);
585                 if (!kvm->created_vcpus) {
586                         kvm->arch.use_cmma = 1;
587                         ret = 0;
588                 }
589                 mutex_unlock(&kvm->lock);
590                 break;
591         case KVM_S390_VM_MEM_CLR_CMMA:
592                 ret = -ENXIO;
593                 if (!sclp.has_cmma)
594                         break;
595                 ret = -EINVAL;
596                 if (!kvm->arch.use_cmma)
597                         break;
598
599                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
600                 mutex_lock(&kvm->lock);
601                 idx = srcu_read_lock(&kvm->srcu);
602                 s390_reset_cmma(kvm->arch.gmap->mm);
603                 srcu_read_unlock(&kvm->srcu, idx);
604                 mutex_unlock(&kvm->lock);
605                 ret = 0;
606                 break;
607         case KVM_S390_VM_MEM_LIMIT_SIZE: {
608                 unsigned long new_limit;
609
610                 if (kvm_is_ucontrol(kvm))
611                         return -EINVAL;
612
613                 if (get_user(new_limit, (u64 __user *)attr->addr))
614                         return -EFAULT;
615
616                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
617                     new_limit > kvm->arch.mem_limit)
618                         return -E2BIG;
619
620                 if (!new_limit)
621                         return -EINVAL;
622
623                 /* gmap_create takes last usable address */
624                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
625                         new_limit -= 1;
626
627                 ret = -EBUSY;
628                 mutex_lock(&kvm->lock);
629                 if (!kvm->created_vcpus) {
630                         /* gmap_create will round the limit up */
631                         struct gmap *new = gmap_create(current->mm, new_limit);
632
633                         if (!new) {
634                                 ret = -ENOMEM;
635                         } else {
636                                 gmap_remove(kvm->arch.gmap);
637                                 new->private = kvm;
638                                 kvm->arch.gmap = new;
639                                 ret = 0;
640                         }
641                 }
642                 mutex_unlock(&kvm->lock);
643                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
644                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
645                          (void *) kvm->arch.gmap->asce);
646                 break;
647         }
648         default:
649                 ret = -ENXIO;
650                 break;
651         }
652         return ret;
653 }
654
655 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
656
657 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
658 {
659         struct kvm_vcpu *vcpu;
660         int i;
661
662         if (!test_kvm_facility(kvm, 76))
663                 return -EINVAL;
664
665         mutex_lock(&kvm->lock);
666         switch (attr->attr) {
667         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
668                 get_random_bytes(
669                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
670                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
671                 kvm->arch.crypto.aes_kw = 1;
672                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
673                 break;
674         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
675                 get_random_bytes(
676                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
677                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
678                 kvm->arch.crypto.dea_kw = 1;
679                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
680                 break;
681         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
682                 kvm->arch.crypto.aes_kw = 0;
683                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
684                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
685                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
686                 break;
687         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
688                 kvm->arch.crypto.dea_kw = 0;
689                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
690                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
691                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
692                 break;
693         default:
694                 mutex_unlock(&kvm->lock);
695                 return -ENXIO;
696         }
697
698         kvm_for_each_vcpu(i, vcpu, kvm) {
699                 kvm_s390_vcpu_crypto_setup(vcpu);
700                 exit_sie(vcpu);
701         }
702         mutex_unlock(&kvm->lock);
703         return 0;
704 }
705
706 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
707 {
708         u8 gtod_high;
709
710         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
711                                            sizeof(gtod_high)))
712                 return -EFAULT;
713
714         if (gtod_high != 0)
715                 return -EINVAL;
716         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
717
718         return 0;
719 }
720
721 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
722 {
723         u64 gtod;
724
725         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
726                 return -EFAULT;
727
728         kvm_s390_set_tod_clock(kvm, gtod);
729         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
730         return 0;
731 }
732
733 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
734 {
735         int ret;
736
737         if (attr->flags)
738                 return -EINVAL;
739
740         switch (attr->attr) {
741         case KVM_S390_VM_TOD_HIGH:
742                 ret = kvm_s390_set_tod_high(kvm, attr);
743                 break;
744         case KVM_S390_VM_TOD_LOW:
745                 ret = kvm_s390_set_tod_low(kvm, attr);
746                 break;
747         default:
748                 ret = -ENXIO;
749                 break;
750         }
751         return ret;
752 }
753
754 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
755 {
756         u8 gtod_high = 0;
757
758         if (copy_to_user((void __user *)attr->addr, &gtod_high,
759                                          sizeof(gtod_high)))
760                 return -EFAULT;
761         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
762
763         return 0;
764 }
765
766 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
767 {
768         u64 gtod;
769
770         gtod = kvm_s390_get_tod_clock_fast(kvm);
771         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
772                 return -EFAULT;
773         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
774
775         return 0;
776 }
777
778 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
779 {
780         int ret;
781
782         if (attr->flags)
783                 return -EINVAL;
784
785         switch (attr->attr) {
786         case KVM_S390_VM_TOD_HIGH:
787                 ret = kvm_s390_get_tod_high(kvm, attr);
788                 break;
789         case KVM_S390_VM_TOD_LOW:
790                 ret = kvm_s390_get_tod_low(kvm, attr);
791                 break;
792         default:
793                 ret = -ENXIO;
794                 break;
795         }
796         return ret;
797 }
798
799 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
800 {
801         struct kvm_s390_vm_cpu_processor *proc;
802         u16 lowest_ibc, unblocked_ibc;
803         int ret = 0;
804
805         mutex_lock(&kvm->lock);
806         if (kvm->created_vcpus) {
807                 ret = -EBUSY;
808                 goto out;
809         }
810         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
811         if (!proc) {
812                 ret = -ENOMEM;
813                 goto out;
814         }
815         if (!copy_from_user(proc, (void __user *)attr->addr,
816                             sizeof(*proc))) {
817                 kvm->arch.model.cpuid = proc->cpuid;
818                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
819                 unblocked_ibc = sclp.ibc & 0xfff;
820                 if (lowest_ibc && proc->ibc) {
821                         if (proc->ibc > unblocked_ibc)
822                                 kvm->arch.model.ibc = unblocked_ibc;
823                         else if (proc->ibc < lowest_ibc)
824                                 kvm->arch.model.ibc = lowest_ibc;
825                         else
826                                 kvm->arch.model.ibc = proc->ibc;
827                 }
828                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
829                        S390_ARCH_FAC_LIST_SIZE_BYTE);
830         } else
831                 ret = -EFAULT;
832         kfree(proc);
833 out:
834         mutex_unlock(&kvm->lock);
835         return ret;
836 }
837
838 static int kvm_s390_set_processor_feat(struct kvm *kvm,
839                                        struct kvm_device_attr *attr)
840 {
841         struct kvm_s390_vm_cpu_feat data;
842         int ret = -EBUSY;
843
844         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
845                 return -EFAULT;
846         if (!bitmap_subset((unsigned long *) data.feat,
847                            kvm_s390_available_cpu_feat,
848                            KVM_S390_VM_CPU_FEAT_NR_BITS))
849                 return -EINVAL;
850
851         mutex_lock(&kvm->lock);
852         if (!atomic_read(&kvm->online_vcpus)) {
853                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
854                             KVM_S390_VM_CPU_FEAT_NR_BITS);
855                 ret = 0;
856         }
857         mutex_unlock(&kvm->lock);
858         return ret;
859 }
860
861 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
862                                           struct kvm_device_attr *attr)
863 {
864         /*
865          * Once supported by kernel + hw, we have to store the subfunctions
866          * in kvm->arch and remember that user space configured them.
867          */
868         return -ENXIO;
869 }
870
871 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
872 {
873         int ret = -ENXIO;
874
875         switch (attr->attr) {
876         case KVM_S390_VM_CPU_PROCESSOR:
877                 ret = kvm_s390_set_processor(kvm, attr);
878                 break;
879         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
880                 ret = kvm_s390_set_processor_feat(kvm, attr);
881                 break;
882         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
883                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
884                 break;
885         }
886         return ret;
887 }
888
889 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
890 {
891         struct kvm_s390_vm_cpu_processor *proc;
892         int ret = 0;
893
894         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
895         if (!proc) {
896                 ret = -ENOMEM;
897                 goto out;
898         }
899         proc->cpuid = kvm->arch.model.cpuid;
900         proc->ibc = kvm->arch.model.ibc;
901         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
902                S390_ARCH_FAC_LIST_SIZE_BYTE);
903         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
904                 ret = -EFAULT;
905         kfree(proc);
906 out:
907         return ret;
908 }
909
910 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
911 {
912         struct kvm_s390_vm_cpu_machine *mach;
913         int ret = 0;
914
915         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
916         if (!mach) {
917                 ret = -ENOMEM;
918                 goto out;
919         }
920         get_cpu_id((struct cpuid *) &mach->cpuid);
921         mach->ibc = sclp.ibc;
922         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
923                S390_ARCH_FAC_LIST_SIZE_BYTE);
924         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
925                sizeof(S390_lowcore.stfle_fac_list));
926         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
927                 ret = -EFAULT;
928         kfree(mach);
929 out:
930         return ret;
931 }
932
933 static int kvm_s390_get_processor_feat(struct kvm *kvm,
934                                        struct kvm_device_attr *attr)
935 {
936         struct kvm_s390_vm_cpu_feat data;
937
938         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
939                     KVM_S390_VM_CPU_FEAT_NR_BITS);
940         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
941                 return -EFAULT;
942         return 0;
943 }
944
945 static int kvm_s390_get_machine_feat(struct kvm *kvm,
946                                      struct kvm_device_attr *attr)
947 {
948         struct kvm_s390_vm_cpu_feat data;
949
950         bitmap_copy((unsigned long *) data.feat,
951                     kvm_s390_available_cpu_feat,
952                     KVM_S390_VM_CPU_FEAT_NR_BITS);
953         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
954                 return -EFAULT;
955         return 0;
956 }
957
958 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
959                                           struct kvm_device_attr *attr)
960 {
961         /*
962          * Once we can actually configure subfunctions (kernel + hw support),
963          * we have to check if they were already set by user space, if so copy
964          * them from kvm->arch.
965          */
966         return -ENXIO;
967 }
968
969 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
970                                         struct kvm_device_attr *attr)
971 {
972         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
973             sizeof(struct kvm_s390_vm_cpu_subfunc)))
974                 return -EFAULT;
975         return 0;
976 }
977 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
978 {
979         int ret = -ENXIO;
980
981         switch (attr->attr) {
982         case KVM_S390_VM_CPU_PROCESSOR:
983                 ret = kvm_s390_get_processor(kvm, attr);
984                 break;
985         case KVM_S390_VM_CPU_MACHINE:
986                 ret = kvm_s390_get_machine(kvm, attr);
987                 break;
988         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
989                 ret = kvm_s390_get_processor_feat(kvm, attr);
990                 break;
991         case KVM_S390_VM_CPU_MACHINE_FEAT:
992                 ret = kvm_s390_get_machine_feat(kvm, attr);
993                 break;
994         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
995                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
996                 break;
997         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
998                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
999                 break;
1000         }
1001         return ret;
1002 }
1003
1004 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1005 {
1006         int ret;
1007
1008         switch (attr->group) {
1009         case KVM_S390_VM_MEM_CTRL:
1010                 ret = kvm_s390_set_mem_control(kvm, attr);
1011                 break;
1012         case KVM_S390_VM_TOD:
1013                 ret = kvm_s390_set_tod(kvm, attr);
1014                 break;
1015         case KVM_S390_VM_CPU_MODEL:
1016                 ret = kvm_s390_set_cpu_model(kvm, attr);
1017                 break;
1018         case KVM_S390_VM_CRYPTO:
1019                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1020                 break;
1021         default:
1022                 ret = -ENXIO;
1023                 break;
1024         }
1025
1026         return ret;
1027 }
1028
1029 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1030 {
1031         int ret;
1032
1033         switch (attr->group) {
1034         case KVM_S390_VM_MEM_CTRL:
1035                 ret = kvm_s390_get_mem_control(kvm, attr);
1036                 break;
1037         case KVM_S390_VM_TOD:
1038                 ret = kvm_s390_get_tod(kvm, attr);
1039                 break;
1040         case KVM_S390_VM_CPU_MODEL:
1041                 ret = kvm_s390_get_cpu_model(kvm, attr);
1042                 break;
1043         default:
1044                 ret = -ENXIO;
1045                 break;
1046         }
1047
1048         return ret;
1049 }
1050
1051 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1052 {
1053         int ret;
1054
1055         switch (attr->group) {
1056         case KVM_S390_VM_MEM_CTRL:
1057                 switch (attr->attr) {
1058                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1059                 case KVM_S390_VM_MEM_CLR_CMMA:
1060                         ret = sclp.has_cmma ? 0 : -ENXIO;
1061                         break;
1062                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1063                         ret = 0;
1064                         break;
1065                 default:
1066                         ret = -ENXIO;
1067                         break;
1068                 }
1069                 break;
1070         case KVM_S390_VM_TOD:
1071                 switch (attr->attr) {
1072                 case KVM_S390_VM_TOD_LOW:
1073                 case KVM_S390_VM_TOD_HIGH:
1074                         ret = 0;
1075                         break;
1076                 default:
1077                         ret = -ENXIO;
1078                         break;
1079                 }
1080                 break;
1081         case KVM_S390_VM_CPU_MODEL:
1082                 switch (attr->attr) {
1083                 case KVM_S390_VM_CPU_PROCESSOR:
1084                 case KVM_S390_VM_CPU_MACHINE:
1085                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1086                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1087                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1088                         ret = 0;
1089                         break;
1090                 /* configuring subfunctions is not supported yet */
1091                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1092                 default:
1093                         ret = -ENXIO;
1094                         break;
1095                 }
1096                 break;
1097         case KVM_S390_VM_CRYPTO:
1098                 switch (attr->attr) {
1099                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1100                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1101                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1102                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1103                         ret = 0;
1104                         break;
1105                 default:
1106                         ret = -ENXIO;
1107                         break;
1108                 }
1109                 break;
1110         default:
1111                 ret = -ENXIO;
1112                 break;
1113         }
1114
1115         return ret;
1116 }
1117
1118 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1119 {
1120         uint8_t *keys;
1121         uint64_t hva;
1122         int i, r = 0;
1123
1124         if (args->flags != 0)
1125                 return -EINVAL;
1126
1127         /* Is this guest using storage keys? */
1128         if (!mm_use_skey(current->mm))
1129                 return KVM_S390_GET_SKEYS_NONE;
1130
1131         /* Enforce sane limit on memory allocation */
1132         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1133                 return -EINVAL;
1134
1135         keys = kmalloc_array(args->count, sizeof(uint8_t),
1136                              GFP_KERNEL | __GFP_NOWARN);
1137         if (!keys)
1138                 keys = vmalloc(sizeof(uint8_t) * args->count);
1139         if (!keys)
1140                 return -ENOMEM;
1141
1142         down_read(&current->mm->mmap_sem);
1143         for (i = 0; i < args->count; i++) {
1144                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1145                 if (kvm_is_error_hva(hva)) {
1146                         r = -EFAULT;
1147                         break;
1148                 }
1149
1150                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1151                 if (r)
1152                         break;
1153         }
1154         up_read(&current->mm->mmap_sem);
1155
1156         if (!r) {
1157                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1158                                  sizeof(uint8_t) * args->count);
1159                 if (r)
1160                         r = -EFAULT;
1161         }
1162
1163         kvfree(keys);
1164         return r;
1165 }
1166
1167 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1168 {
1169         uint8_t *keys;
1170         uint64_t hva;
1171         int i, r = 0;
1172
1173         if (args->flags != 0)
1174                 return -EINVAL;
1175
1176         /* Enforce sane limit on memory allocation */
1177         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1178                 return -EINVAL;
1179
1180         keys = kmalloc_array(args->count, sizeof(uint8_t),
1181                              GFP_KERNEL | __GFP_NOWARN);
1182         if (!keys)
1183                 keys = vmalloc(sizeof(uint8_t) * args->count);
1184         if (!keys)
1185                 return -ENOMEM;
1186
1187         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1188                            sizeof(uint8_t) * args->count);
1189         if (r) {
1190                 r = -EFAULT;
1191                 goto out;
1192         }
1193
1194         /* Enable storage key handling for the guest */
1195         r = s390_enable_skey();
1196         if (r)
1197                 goto out;
1198
1199         down_read(&current->mm->mmap_sem);
1200         for (i = 0; i < args->count; i++) {
1201                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1202                 if (kvm_is_error_hva(hva)) {
1203                         r = -EFAULT;
1204                         break;
1205                 }
1206
1207                 /* Lowest order bit is reserved */
1208                 if (keys[i] & 0x01) {
1209                         r = -EINVAL;
1210                         break;
1211                 }
1212
1213                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1214                 if (r)
1215                         break;
1216         }
1217         up_read(&current->mm->mmap_sem);
1218 out:
1219         kvfree(keys);
1220         return r;
1221 }
1222
1223 long kvm_arch_vm_ioctl(struct file *filp,
1224                        unsigned int ioctl, unsigned long arg)
1225 {
1226         struct kvm *kvm = filp->private_data;
1227         void __user *argp = (void __user *)arg;
1228         struct kvm_device_attr attr;
1229         int r;
1230
1231         switch (ioctl) {
1232         case KVM_S390_INTERRUPT: {
1233                 struct kvm_s390_interrupt s390int;
1234
1235                 r = -EFAULT;
1236                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1237                         break;
1238                 r = kvm_s390_inject_vm(kvm, &s390int);
1239                 break;
1240         }
1241         case KVM_ENABLE_CAP: {
1242                 struct kvm_enable_cap cap;
1243                 r = -EFAULT;
1244                 if (copy_from_user(&cap, argp, sizeof(cap)))
1245                         break;
1246                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1247                 break;
1248         }
1249         case KVM_CREATE_IRQCHIP: {
1250                 struct kvm_irq_routing_entry routing;
1251
1252                 r = -EINVAL;
1253                 if (kvm->arch.use_irqchip) {
1254                         /* Set up dummy routing. */
1255                         memset(&routing, 0, sizeof(routing));
1256                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1257                 }
1258                 break;
1259         }
1260         case KVM_SET_DEVICE_ATTR: {
1261                 r = -EFAULT;
1262                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1263                         break;
1264                 r = kvm_s390_vm_set_attr(kvm, &attr);
1265                 break;
1266         }
1267         case KVM_GET_DEVICE_ATTR: {
1268                 r = -EFAULT;
1269                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1270                         break;
1271                 r = kvm_s390_vm_get_attr(kvm, &attr);
1272                 break;
1273         }
1274         case KVM_HAS_DEVICE_ATTR: {
1275                 r = -EFAULT;
1276                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1277                         break;
1278                 r = kvm_s390_vm_has_attr(kvm, &attr);
1279                 break;
1280         }
1281         case KVM_S390_GET_SKEYS: {
1282                 struct kvm_s390_skeys args;
1283
1284                 r = -EFAULT;
1285                 if (copy_from_user(&args, argp,
1286                                    sizeof(struct kvm_s390_skeys)))
1287                         break;
1288                 r = kvm_s390_get_skeys(kvm, &args);
1289                 break;
1290         }
1291         case KVM_S390_SET_SKEYS: {
1292                 struct kvm_s390_skeys args;
1293
1294                 r = -EFAULT;
1295                 if (copy_from_user(&args, argp,
1296                                    sizeof(struct kvm_s390_skeys)))
1297                         break;
1298                 r = kvm_s390_set_skeys(kvm, &args);
1299                 break;
1300         }
1301         default:
1302                 r = -ENOTTY;
1303         }
1304
1305         return r;
1306 }
1307
1308 static int kvm_s390_query_ap_config(u8 *config)
1309 {
1310         u32 fcn_code = 0x04000000UL;
1311         u32 cc = 0;
1312
1313         memset(config, 0, 128);
1314         asm volatile(
1315                 "lgr 0,%1\n"
1316                 "lgr 2,%2\n"
1317                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1318                 "0: ipm %0\n"
1319                 "srl %0,28\n"
1320                 "1:\n"
1321                 EX_TABLE(0b, 1b)
1322                 : "+r" (cc)
1323                 : "r" (fcn_code), "r" (config)
1324                 : "cc", "0", "2", "memory"
1325         );
1326
1327         return cc;
1328 }
1329
1330 static int kvm_s390_apxa_installed(void)
1331 {
1332         u8 config[128];
1333         int cc;
1334
1335         if (test_facility(12)) {
1336                 cc = kvm_s390_query_ap_config(config);
1337
1338                 if (cc)
1339                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1340                 else
1341                         return config[0] & 0x40;
1342         }
1343
1344         return 0;
1345 }
1346
1347 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1348 {
1349         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1350
1351         if (kvm_s390_apxa_installed())
1352                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1353         else
1354                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1355 }
1356
1357 static u64 kvm_s390_get_initial_cpuid(void)
1358 {
1359         struct cpuid cpuid;
1360
1361         get_cpu_id(&cpuid);
1362         cpuid.version = 0xff;
1363         return *((u64 *) &cpuid);
1364 }
1365
1366 static void kvm_s390_crypto_init(struct kvm *kvm)
1367 {
1368         if (!test_kvm_facility(kvm, 76))
1369                 return;
1370
1371         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1372         kvm_s390_set_crycb_format(kvm);
1373
1374         /* Enable AES/DEA protected key functions by default */
1375         kvm->arch.crypto.aes_kw = 1;
1376         kvm->arch.crypto.dea_kw = 1;
1377         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1378                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1379         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1380                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1381 }
1382
1383 static void sca_dispose(struct kvm *kvm)
1384 {
1385         if (kvm->arch.use_esca)
1386                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1387         else
1388                 free_page((unsigned long)(kvm->arch.sca));
1389         kvm->arch.sca = NULL;
1390 }
1391
1392 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1393 {
1394         gfp_t alloc_flags = GFP_KERNEL;
1395         int i, rc;
1396         char debug_name[16];
1397         static unsigned long sca_offset;
1398
1399         rc = -EINVAL;
1400 #ifdef CONFIG_KVM_S390_UCONTROL
1401         if (type & ~KVM_VM_S390_UCONTROL)
1402                 goto out_err;
1403         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1404                 goto out_err;
1405 #else
1406         if (type)
1407                 goto out_err;
1408 #endif
1409
1410         rc = s390_enable_sie();
1411         if (rc)
1412                 goto out_err;
1413
1414         rc = -ENOMEM;
1415
1416         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1417
1418         kvm->arch.use_esca = 0; /* start with basic SCA */
1419         if (!sclp.has_64bscao)
1420                 alloc_flags |= GFP_DMA;
1421         rwlock_init(&kvm->arch.sca_lock);
1422         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1423         if (!kvm->arch.sca)
1424                 goto out_err;
1425         spin_lock(&kvm_lock);
1426         sca_offset += 16;
1427         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1428                 sca_offset = 0;
1429         kvm->arch.sca = (struct bsca_block *)
1430                         ((char *) kvm->arch.sca + sca_offset);
1431         spin_unlock(&kvm_lock);
1432
1433         sprintf(debug_name, "kvm-%u", current->pid);
1434
1435         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1436         if (!kvm->arch.dbf)
1437                 goto out_err;
1438
1439         kvm->arch.sie_page2 =
1440              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1441         if (!kvm->arch.sie_page2)
1442                 goto out_err;
1443
1444         /* Populate the facility mask initially. */
1445         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1446                sizeof(S390_lowcore.stfle_fac_list));
1447         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1448                 if (i < kvm_s390_fac_list_mask_size())
1449                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1450                 else
1451                         kvm->arch.model.fac_mask[i] = 0UL;
1452         }
1453
1454         /* Populate the facility list initially. */
1455         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1456         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1457                S390_ARCH_FAC_LIST_SIZE_BYTE);
1458
1459         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1460         set_kvm_facility(kvm->arch.model.fac_list, 74);
1461
1462         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1463         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1464
1465         kvm_s390_crypto_init(kvm);
1466
1467         spin_lock_init(&kvm->arch.float_int.lock);
1468         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1469                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1470         init_waitqueue_head(&kvm->arch.ipte_wq);
1471         mutex_init(&kvm->arch.ipte_mutex);
1472
1473         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1474         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1475
1476         if (type & KVM_VM_S390_UCONTROL) {
1477                 kvm->arch.gmap = NULL;
1478                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1479         } else {
1480                 if (sclp.hamax == U64_MAX)
1481                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1482                 else
1483                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1484                                                     sclp.hamax + 1);
1485                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1486                 if (!kvm->arch.gmap)
1487                         goto out_err;
1488                 kvm->arch.gmap->private = kvm;
1489                 kvm->arch.gmap->pfault_enabled = 0;
1490         }
1491
1492         kvm->arch.css_support = 0;
1493         kvm->arch.use_irqchip = 0;
1494         kvm->arch.epoch = 0;
1495
1496         spin_lock_init(&kvm->arch.start_stop_lock);
1497         kvm_s390_vsie_init(kvm);
1498         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1499
1500         return 0;
1501 out_err:
1502         free_page((unsigned long)kvm->arch.sie_page2);
1503         debug_unregister(kvm->arch.dbf);
1504         sca_dispose(kvm);
1505         KVM_EVENT(3, "creation of vm failed: %d", rc);
1506         return rc;
1507 }
1508
1509 bool kvm_arch_has_vcpu_debugfs(void)
1510 {
1511         return false;
1512 }
1513
1514 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1515 {
1516         return 0;
1517 }
1518
1519 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1520 {
1521         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1522         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1523         kvm_s390_clear_local_irqs(vcpu);
1524         kvm_clear_async_pf_completion_queue(vcpu);
1525         if (!kvm_is_ucontrol(vcpu->kvm))
1526                 sca_del_vcpu(vcpu);
1527
1528         if (kvm_is_ucontrol(vcpu->kvm))
1529                 gmap_remove(vcpu->arch.gmap);
1530
1531         if (vcpu->kvm->arch.use_cmma)
1532                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1533         free_page((unsigned long)(vcpu->arch.sie_block));
1534
1535         kvm_vcpu_uninit(vcpu);
1536         kmem_cache_free(kvm_vcpu_cache, vcpu);
1537 }
1538
1539 static void kvm_free_vcpus(struct kvm *kvm)
1540 {
1541         unsigned int i;
1542         struct kvm_vcpu *vcpu;
1543
1544         kvm_for_each_vcpu(i, vcpu, kvm)
1545                 kvm_arch_vcpu_destroy(vcpu);
1546
1547         mutex_lock(&kvm->lock);
1548         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1549                 kvm->vcpus[i] = NULL;
1550
1551         atomic_set(&kvm->online_vcpus, 0);
1552         mutex_unlock(&kvm->lock);
1553 }
1554
1555 void kvm_arch_destroy_vm(struct kvm *kvm)
1556 {
1557         kvm_free_vcpus(kvm);
1558         sca_dispose(kvm);
1559         debug_unregister(kvm->arch.dbf);
1560         free_page((unsigned long)kvm->arch.sie_page2);
1561         if (!kvm_is_ucontrol(kvm))
1562                 gmap_remove(kvm->arch.gmap);
1563         kvm_s390_destroy_adapters(kvm);
1564         kvm_s390_clear_float_irqs(kvm);
1565         kvm_s390_vsie_destroy(kvm);
1566         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1567 }
1568
1569 /* Section: vcpu related */
1570 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1571 {
1572         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1573         if (!vcpu->arch.gmap)
1574                 return -ENOMEM;
1575         vcpu->arch.gmap->private = vcpu->kvm;
1576
1577         return 0;
1578 }
1579
1580 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1581 {
1582         if (!kvm_s390_use_sca_entries())
1583                 return;
1584         read_lock(&vcpu->kvm->arch.sca_lock);
1585         if (vcpu->kvm->arch.use_esca) {
1586                 struct esca_block *sca = vcpu->kvm->arch.sca;
1587
1588                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1589                 sca->cpu[vcpu->vcpu_id].sda = 0;
1590         } else {
1591                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1592
1593                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1594                 sca->cpu[vcpu->vcpu_id].sda = 0;
1595         }
1596         read_unlock(&vcpu->kvm->arch.sca_lock);
1597 }
1598
1599 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1600 {
1601         if (!kvm_s390_use_sca_entries()) {
1602                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1603
1604                 /* we still need the basic sca for the ipte control */
1605                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1606                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1607                 return;
1608         }
1609         read_lock(&vcpu->kvm->arch.sca_lock);
1610         if (vcpu->kvm->arch.use_esca) {
1611                 struct esca_block *sca = vcpu->kvm->arch.sca;
1612
1613                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1614                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1615                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1616                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1617                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1618         } else {
1619                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1620
1621                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1622                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1623                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1624                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1625         }
1626         read_unlock(&vcpu->kvm->arch.sca_lock);
1627 }
1628
1629 /* Basic SCA to Extended SCA data copy routines */
1630 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1631 {
1632         d->sda = s->sda;
1633         d->sigp_ctrl.c = s->sigp_ctrl.c;
1634         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1635 }
1636
1637 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1638 {
1639         int i;
1640
1641         d->ipte_control = s->ipte_control;
1642         d->mcn[0] = s->mcn;
1643         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1644                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1645 }
1646
1647 static int sca_switch_to_extended(struct kvm *kvm)
1648 {
1649         struct bsca_block *old_sca = kvm->arch.sca;
1650         struct esca_block *new_sca;
1651         struct kvm_vcpu *vcpu;
1652         unsigned int vcpu_idx;
1653         u32 scaol, scaoh;
1654
1655         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1656         if (!new_sca)
1657                 return -ENOMEM;
1658
1659         scaoh = (u32)((u64)(new_sca) >> 32);
1660         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1661
1662         kvm_s390_vcpu_block_all(kvm);
1663         write_lock(&kvm->arch.sca_lock);
1664
1665         sca_copy_b_to_e(new_sca, old_sca);
1666
1667         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1668                 vcpu->arch.sie_block->scaoh = scaoh;
1669                 vcpu->arch.sie_block->scaol = scaol;
1670                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1671         }
1672         kvm->arch.sca = new_sca;
1673         kvm->arch.use_esca = 1;
1674
1675         write_unlock(&kvm->arch.sca_lock);
1676         kvm_s390_vcpu_unblock_all(kvm);
1677
1678         free_page((unsigned long)old_sca);
1679
1680         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1681                  old_sca, kvm->arch.sca);
1682         return 0;
1683 }
1684
1685 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1686 {
1687         int rc;
1688
1689         if (!kvm_s390_use_sca_entries()) {
1690                 if (id < KVM_MAX_VCPUS)
1691                         return true;
1692                 return false;
1693         }
1694         if (id < KVM_S390_BSCA_CPU_SLOTS)
1695                 return true;
1696         if (!sclp.has_esca || !sclp.has_64bscao)
1697                 return false;
1698
1699         mutex_lock(&kvm->lock);
1700         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1701         mutex_unlock(&kvm->lock);
1702
1703         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1704 }
1705
1706 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1707 {
1708         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1709         kvm_clear_async_pf_completion_queue(vcpu);
1710         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1711                                     KVM_SYNC_GPRS |
1712                                     KVM_SYNC_ACRS |
1713                                     KVM_SYNC_CRS |
1714                                     KVM_SYNC_ARCH0 |
1715                                     KVM_SYNC_PFAULT;
1716         kvm_s390_set_prefix(vcpu, 0);
1717         if (test_kvm_facility(vcpu->kvm, 64))
1718                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1719         if (test_kvm_facility(vcpu->kvm, 82))
1720                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
1721         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1722          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1723          */
1724         if (MACHINE_HAS_VX)
1725                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1726         else
1727                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1728
1729         if (kvm_is_ucontrol(vcpu->kvm))
1730                 return __kvm_ucontrol_vcpu_init(vcpu);
1731
1732         return 0;
1733 }
1734
1735 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1736 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1737 {
1738         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1739         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1740         vcpu->arch.cputm_start = get_tod_clock_fast();
1741         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1742 }
1743
1744 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1745 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1746 {
1747         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1748         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1749         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1750         vcpu->arch.cputm_start = 0;
1751         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1752 }
1753
1754 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1755 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1756 {
1757         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1758         vcpu->arch.cputm_enabled = true;
1759         __start_cpu_timer_accounting(vcpu);
1760 }
1761
1762 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1763 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1764 {
1765         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1766         __stop_cpu_timer_accounting(vcpu);
1767         vcpu->arch.cputm_enabled = false;
1768 }
1769
1770 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1771 {
1772         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1773         __enable_cpu_timer_accounting(vcpu);
1774         preempt_enable();
1775 }
1776
1777 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1778 {
1779         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1780         __disable_cpu_timer_accounting(vcpu);
1781         preempt_enable();
1782 }
1783
1784 /* set the cpu timer - may only be called from the VCPU thread itself */
1785 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1786 {
1787         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1788         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1789         if (vcpu->arch.cputm_enabled)
1790                 vcpu->arch.cputm_start = get_tod_clock_fast();
1791         vcpu->arch.sie_block->cputm = cputm;
1792         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1793         preempt_enable();
1794 }
1795
1796 /* update and get the cpu timer - can also be called from other VCPU threads */
1797 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1798 {
1799         unsigned int seq;
1800         __u64 value;
1801
1802         if (unlikely(!vcpu->arch.cputm_enabled))
1803                 return vcpu->arch.sie_block->cputm;
1804
1805         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1806         do {
1807                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1808                 /*
1809                  * If the writer would ever execute a read in the critical
1810                  * section, e.g. in irq context, we have a deadlock.
1811                  */
1812                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1813                 value = vcpu->arch.sie_block->cputm;
1814                 /* if cputm_start is 0, accounting is being started/stopped */
1815                 if (likely(vcpu->arch.cputm_start))
1816                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1817         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1818         preempt_enable();
1819         return value;
1820 }
1821
1822 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1823 {
1824         /* Save host register state */
1825         save_fpu_regs();
1826         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1827         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1828
1829         if (MACHINE_HAS_VX)
1830                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1831         else
1832                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1833         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1834         if (test_fp_ctl(current->thread.fpu.fpc))
1835                 /* User space provided an invalid FPC, let's clear it */
1836                 current->thread.fpu.fpc = 0;
1837         save_access_regs(vcpu->arch.host_acrs);
1838         restore_access_regs(vcpu->run->s.regs.acrs);
1839         gmap_enable(vcpu->arch.enabled_gmap);
1840         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1841         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1842                 __start_cpu_timer_accounting(vcpu);
1843         vcpu->cpu = cpu;
1844 }
1845
1846 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1847 {
1848         vcpu->cpu = -1;
1849         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1850                 __stop_cpu_timer_accounting(vcpu);
1851         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1852         vcpu->arch.enabled_gmap = gmap_get_enabled();
1853         gmap_disable(vcpu->arch.enabled_gmap);
1854
1855         /* Save guest register state */
1856         save_fpu_regs();
1857         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1858
1859         /* Restore host register state */
1860         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1861         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1862
1863         save_access_regs(vcpu->run->s.regs.acrs);
1864         restore_access_regs(vcpu->arch.host_acrs);
1865 }
1866
1867 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1868 {
1869         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1870         vcpu->arch.sie_block->gpsw.mask = 0UL;
1871         vcpu->arch.sie_block->gpsw.addr = 0UL;
1872         kvm_s390_set_prefix(vcpu, 0);
1873         kvm_s390_set_cpu_timer(vcpu, 0);
1874         vcpu->arch.sie_block->ckc       = 0UL;
1875         vcpu->arch.sie_block->todpr     = 0;
1876         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1877         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1878         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1879         /* make sure the new fpc will be lazily loaded */
1880         save_fpu_regs();
1881         current->thread.fpu.fpc = 0;
1882         vcpu->arch.sie_block->gbea = 1;
1883         vcpu->arch.sie_block->pp = 0;
1884         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
1885         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1886         kvm_clear_async_pf_completion_queue(vcpu);
1887         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1888                 kvm_s390_vcpu_stop(vcpu);
1889         kvm_s390_clear_local_irqs(vcpu);
1890 }
1891
1892 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1893 {
1894         mutex_lock(&vcpu->kvm->lock);
1895         preempt_disable();
1896         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1897         preempt_enable();
1898         mutex_unlock(&vcpu->kvm->lock);
1899         if (!kvm_is_ucontrol(vcpu->kvm)) {
1900                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1901                 sca_add_vcpu(vcpu);
1902         }
1903         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1904                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1905         /* make vcpu_load load the right gmap on the first trigger */
1906         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1907 }
1908
1909 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1910 {
1911         if (!test_kvm_facility(vcpu->kvm, 76))
1912                 return;
1913
1914         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1915
1916         if (vcpu->kvm->arch.crypto.aes_kw)
1917                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1918         if (vcpu->kvm->arch.crypto.dea_kw)
1919                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1920
1921         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1922 }
1923
1924 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1925 {
1926         free_page(vcpu->arch.sie_block->cbrlo);
1927         vcpu->arch.sie_block->cbrlo = 0;
1928 }
1929
1930 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1931 {
1932         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1933         if (!vcpu->arch.sie_block->cbrlo)
1934                 return -ENOMEM;
1935
1936         vcpu->arch.sie_block->ecb2 |= 0x80;
1937         vcpu->arch.sie_block->ecb2 &= ~0x08;
1938         return 0;
1939 }
1940
1941 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1942 {
1943         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1944
1945         vcpu->arch.sie_block->ibc = model->ibc;
1946         if (test_kvm_facility(vcpu->kvm, 7))
1947                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1948 }
1949
1950 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1951 {
1952         int rc = 0;
1953
1954         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1955                                                     CPUSTAT_SM |
1956                                                     CPUSTAT_STOPPED);
1957
1958         if (test_kvm_facility(vcpu->kvm, 78))
1959                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1960         else if (test_kvm_facility(vcpu->kvm, 8))
1961                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1962
1963         kvm_s390_vcpu_setup_model(vcpu);
1964
1965         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1966         if (MACHINE_HAS_ESOP)
1967                 vcpu->arch.sie_block->ecb |= 0x02;
1968         if (test_kvm_facility(vcpu->kvm, 9))
1969                 vcpu->arch.sie_block->ecb |= 0x04;
1970         if (test_kvm_facility(vcpu->kvm, 73))
1971                 vcpu->arch.sie_block->ecb |= 0x10;
1972
1973         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1974                 vcpu->arch.sie_block->ecb2 |= 0x08;
1975         vcpu->arch.sie_block->eca = 0x1002000U;
1976         if (sclp.has_cei)
1977                 vcpu->arch.sie_block->eca |= 0x80000000U;
1978         if (sclp.has_ib)
1979                 vcpu->arch.sie_block->eca |= 0x40000000U;
1980         if (sclp.has_siif)
1981                 vcpu->arch.sie_block->eca |= 1;
1982         if (sclp.has_sigpif)
1983                 vcpu->arch.sie_block->eca |= 0x10000000U;
1984         if (test_kvm_facility(vcpu->kvm, 129)) {
1985                 vcpu->arch.sie_block->eca |= 0x00020000;
1986                 vcpu->arch.sie_block->ecd |= 0x20000000;
1987         }
1988         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1989         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1990
1991         if (vcpu->kvm->arch.use_cmma) {
1992                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1993                 if (rc)
1994                         return rc;
1995         }
1996         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1997         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1998
1999         kvm_s390_vcpu_crypto_setup(vcpu);
2000
2001         return rc;
2002 }
2003
2004 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2005                                       unsigned int id)
2006 {
2007         struct kvm_vcpu *vcpu;
2008         struct sie_page *sie_page;
2009         int rc = -EINVAL;
2010
2011         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2012                 goto out;
2013
2014         rc = -ENOMEM;
2015
2016         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2017         if (!vcpu)
2018                 goto out;
2019
2020         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2021         if (!sie_page)
2022                 goto out_free_cpu;
2023
2024         vcpu->arch.sie_block = &sie_page->sie_block;
2025         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2026
2027         /* the real guest size will always be smaller than msl */
2028         vcpu->arch.sie_block->mso = 0;
2029         vcpu->arch.sie_block->msl = sclp.hamax;
2030
2031         vcpu->arch.sie_block->icpua = id;
2032         spin_lock_init(&vcpu->arch.local_int.lock);
2033         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2034         vcpu->arch.local_int.wq = &vcpu->wq;
2035         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2036         seqcount_init(&vcpu->arch.cputm_seqcount);
2037
2038         rc = kvm_vcpu_init(vcpu, kvm, id);
2039         if (rc)
2040                 goto out_free_sie_block;
2041         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2042                  vcpu->arch.sie_block);
2043         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2044
2045         return vcpu;
2046 out_free_sie_block:
2047         free_page((unsigned long)(vcpu->arch.sie_block));
2048 out_free_cpu:
2049         kmem_cache_free(kvm_vcpu_cache, vcpu);
2050 out:
2051         return ERR_PTR(rc);
2052 }
2053
2054 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2055 {
2056         return kvm_s390_vcpu_has_irq(vcpu, 0);
2057 }
2058
2059 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2060 {
2061         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2062         exit_sie(vcpu);
2063 }
2064
2065 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2066 {
2067         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2068 }
2069
2070 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2071 {
2072         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2073         exit_sie(vcpu);
2074 }
2075
2076 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2077 {
2078         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2079 }
2080
2081 /*
2082  * Kick a guest cpu out of SIE and wait until SIE is not running.
2083  * If the CPU is not running (e.g. waiting as idle) the function will
2084  * return immediately. */
2085 void exit_sie(struct kvm_vcpu *vcpu)
2086 {
2087         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2088         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2089                 cpu_relax();
2090 }
2091
2092 /* Kick a guest cpu out of SIE to process a request synchronously */
2093 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2094 {
2095         kvm_make_request(req, vcpu);
2096         kvm_s390_vcpu_request(vcpu);
2097 }
2098
2099 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2100                               unsigned long end)
2101 {
2102         struct kvm *kvm = gmap->private;
2103         struct kvm_vcpu *vcpu;
2104         unsigned long prefix;
2105         int i;
2106
2107         if (gmap_is_shadow(gmap))
2108                 return;
2109         if (start >= 1UL << 31)
2110                 /* We are only interested in prefix pages */
2111                 return;
2112         kvm_for_each_vcpu(i, vcpu, kvm) {
2113                 /* match against both prefix pages */
2114                 prefix = kvm_s390_get_prefix(vcpu);
2115                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2116                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2117                                    start, end);
2118                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2119                 }
2120         }
2121 }
2122
2123 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2124 {
2125         /* kvm common code refers to this, but never calls it */
2126         BUG();
2127         return 0;
2128 }
2129
2130 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2131                                            struct kvm_one_reg *reg)
2132 {
2133         int r = -EINVAL;
2134
2135         switch (reg->id) {
2136         case KVM_REG_S390_TODPR:
2137                 r = put_user(vcpu->arch.sie_block->todpr,
2138                              (u32 __user *)reg->addr);
2139                 break;
2140         case KVM_REG_S390_EPOCHDIFF:
2141                 r = put_user(vcpu->arch.sie_block->epoch,
2142                              (u64 __user *)reg->addr);
2143                 break;
2144         case KVM_REG_S390_CPU_TIMER:
2145                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2146                              (u64 __user *)reg->addr);
2147                 break;
2148         case KVM_REG_S390_CLOCK_COMP:
2149                 r = put_user(vcpu->arch.sie_block->ckc,
2150                              (u64 __user *)reg->addr);
2151                 break;
2152         case KVM_REG_S390_PFTOKEN:
2153                 r = put_user(vcpu->arch.pfault_token,
2154                              (u64 __user *)reg->addr);
2155                 break;
2156         case KVM_REG_S390_PFCOMPARE:
2157                 r = put_user(vcpu->arch.pfault_compare,
2158                              (u64 __user *)reg->addr);
2159                 break;
2160         case KVM_REG_S390_PFSELECT:
2161                 r = put_user(vcpu->arch.pfault_select,
2162                              (u64 __user *)reg->addr);
2163                 break;
2164         case KVM_REG_S390_PP:
2165                 r = put_user(vcpu->arch.sie_block->pp,
2166                              (u64 __user *)reg->addr);
2167                 break;
2168         case KVM_REG_S390_GBEA:
2169                 r = put_user(vcpu->arch.sie_block->gbea,
2170                              (u64 __user *)reg->addr);
2171                 break;
2172         default:
2173                 break;
2174         }
2175
2176         return r;
2177 }
2178
2179 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2180                                            struct kvm_one_reg *reg)
2181 {
2182         int r = -EINVAL;
2183         __u64 val;
2184
2185         switch (reg->id) {
2186         case KVM_REG_S390_TODPR:
2187                 r = get_user(vcpu->arch.sie_block->todpr,
2188                              (u32 __user *)reg->addr);
2189                 break;
2190         case KVM_REG_S390_EPOCHDIFF:
2191                 r = get_user(vcpu->arch.sie_block->epoch,
2192                              (u64 __user *)reg->addr);
2193                 break;
2194         case KVM_REG_S390_CPU_TIMER:
2195                 r = get_user(val, (u64 __user *)reg->addr);
2196                 if (!r)
2197                         kvm_s390_set_cpu_timer(vcpu, val);
2198                 break;
2199         case KVM_REG_S390_CLOCK_COMP:
2200                 r = get_user(vcpu->arch.sie_block->ckc,
2201                              (u64 __user *)reg->addr);
2202                 break;
2203         case KVM_REG_S390_PFTOKEN:
2204                 r = get_user(vcpu->arch.pfault_token,
2205                              (u64 __user *)reg->addr);
2206                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2207                         kvm_clear_async_pf_completion_queue(vcpu);
2208                 break;
2209         case KVM_REG_S390_PFCOMPARE:
2210                 r = get_user(vcpu->arch.pfault_compare,
2211                              (u64 __user *)reg->addr);
2212                 break;
2213         case KVM_REG_S390_PFSELECT:
2214                 r = get_user(vcpu->arch.pfault_select,
2215                              (u64 __user *)reg->addr);
2216                 break;
2217         case KVM_REG_S390_PP:
2218                 r = get_user(vcpu->arch.sie_block->pp,
2219                              (u64 __user *)reg->addr);
2220                 break;
2221         case KVM_REG_S390_GBEA:
2222                 r = get_user(vcpu->arch.sie_block->gbea,
2223                              (u64 __user *)reg->addr);
2224                 break;
2225         default:
2226                 break;
2227         }
2228
2229         return r;
2230 }
2231
2232 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2233 {
2234         kvm_s390_vcpu_initial_reset(vcpu);
2235         return 0;
2236 }
2237
2238 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2239 {
2240         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2241         return 0;
2242 }
2243
2244 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2245 {
2246         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2247         return 0;
2248 }
2249
2250 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2251                                   struct kvm_sregs *sregs)
2252 {
2253         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2254         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2255         restore_access_regs(vcpu->run->s.regs.acrs);
2256         return 0;
2257 }
2258
2259 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2260                                   struct kvm_sregs *sregs)
2261 {
2262         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2263         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2264         return 0;
2265 }
2266
2267 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2268 {
2269         /* make sure the new values will be lazily loaded */
2270         save_fpu_regs();
2271         if (test_fp_ctl(fpu->fpc))
2272                 return -EINVAL;
2273         current->thread.fpu.fpc = fpu->fpc;
2274         if (MACHINE_HAS_VX)
2275                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2276                                  (freg_t *) fpu->fprs);
2277         else
2278                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2279         return 0;
2280 }
2281
2282 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2283 {
2284         /* make sure we have the latest values */
2285         save_fpu_regs();
2286         if (MACHINE_HAS_VX)
2287                 convert_vx_to_fp((freg_t *) fpu->fprs,
2288                                  (__vector128 *) vcpu->run->s.regs.vrs);
2289         else
2290                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2291         fpu->fpc = current->thread.fpu.fpc;
2292         return 0;
2293 }
2294
2295 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2296 {
2297         int rc = 0;
2298
2299         if (!is_vcpu_stopped(vcpu))
2300                 rc = -EBUSY;
2301         else {
2302                 vcpu->run->psw_mask = psw.mask;
2303                 vcpu->run->psw_addr = psw.addr;
2304         }
2305         return rc;
2306 }
2307
2308 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2309                                   struct kvm_translation *tr)
2310 {
2311         return -EINVAL; /* not implemented yet */
2312 }
2313
2314 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2315                               KVM_GUESTDBG_USE_HW_BP | \
2316                               KVM_GUESTDBG_ENABLE)
2317
2318 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2319                                         struct kvm_guest_debug *dbg)
2320 {
2321         int rc = 0;
2322
2323         vcpu->guest_debug = 0;
2324         kvm_s390_clear_bp_data(vcpu);
2325
2326         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2327                 return -EINVAL;
2328         if (!sclp.has_gpere)
2329                 return -EINVAL;
2330
2331         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2332                 vcpu->guest_debug = dbg->control;
2333                 /* enforce guest PER */
2334                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2335
2336                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2337                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2338         } else {
2339                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2340                 vcpu->arch.guestdbg.last_bp = 0;
2341         }
2342
2343         if (rc) {
2344                 vcpu->guest_debug = 0;
2345                 kvm_s390_clear_bp_data(vcpu);
2346                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2347         }
2348
2349         return rc;
2350 }
2351
2352 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2353                                     struct kvm_mp_state *mp_state)
2354 {
2355         /* CHECK_STOP and LOAD are not supported yet */
2356         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2357                                        KVM_MP_STATE_OPERATING;
2358 }
2359
2360 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2361                                     struct kvm_mp_state *mp_state)
2362 {
2363         int rc = 0;
2364
2365         /* user space knows about this interface - let it control the state */
2366         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2367
2368         switch (mp_state->mp_state) {
2369         case KVM_MP_STATE_STOPPED:
2370                 kvm_s390_vcpu_stop(vcpu);
2371                 break;
2372         case KVM_MP_STATE_OPERATING:
2373                 kvm_s390_vcpu_start(vcpu);
2374                 break;
2375         case KVM_MP_STATE_LOAD:
2376         case KVM_MP_STATE_CHECK_STOP:
2377                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2378         default:
2379                 rc = -ENXIO;
2380         }
2381
2382         return rc;
2383 }
2384
2385 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2386 {
2387         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2388 }
2389
2390 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2391 {
2392 retry:
2393         kvm_s390_vcpu_request_handled(vcpu);
2394         if (!vcpu->requests)
2395                 return 0;
2396         /*
2397          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2398          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2399          * This ensures that the ipte instruction for this request has
2400          * already finished. We might race against a second unmapper that
2401          * wants to set the blocking bit. Lets just retry the request loop.
2402          */
2403         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2404                 int rc;
2405                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2406                                           kvm_s390_get_prefix(vcpu),
2407                                           PAGE_SIZE * 2, PROT_WRITE);
2408                 if (rc) {
2409                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2410                         return rc;
2411                 }
2412                 goto retry;
2413         }
2414
2415         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2416                 vcpu->arch.sie_block->ihcpu = 0xffff;
2417                 goto retry;
2418         }
2419
2420         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2421                 if (!ibs_enabled(vcpu)) {
2422                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2423                         atomic_or(CPUSTAT_IBS,
2424                                         &vcpu->arch.sie_block->cpuflags);
2425                 }
2426                 goto retry;
2427         }
2428
2429         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2430                 if (ibs_enabled(vcpu)) {
2431                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2432                         atomic_andnot(CPUSTAT_IBS,
2433                                           &vcpu->arch.sie_block->cpuflags);
2434                 }
2435                 goto retry;
2436         }
2437
2438         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2439                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2440                 goto retry;
2441         }
2442
2443         /* nothing to do, just clear the request */
2444         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2445
2446         return 0;
2447 }
2448
2449 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2450 {
2451         struct kvm_vcpu *vcpu;
2452         int i;
2453
2454         mutex_lock(&kvm->lock);
2455         preempt_disable();
2456         kvm->arch.epoch = tod - get_tod_clock();
2457         kvm_s390_vcpu_block_all(kvm);
2458         kvm_for_each_vcpu(i, vcpu, kvm)
2459                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2460         kvm_s390_vcpu_unblock_all(kvm);
2461         preempt_enable();
2462         mutex_unlock(&kvm->lock);
2463 }
2464
2465 /**
2466  * kvm_arch_fault_in_page - fault-in guest page if necessary
2467  * @vcpu: The corresponding virtual cpu
2468  * @gpa: Guest physical address
2469  * @writable: Whether the page should be writable or not
2470  *
2471  * Make sure that a guest page has been faulted-in on the host.
2472  *
2473  * Return: Zero on success, negative error code otherwise.
2474  */
2475 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2476 {
2477         return gmap_fault(vcpu->arch.gmap, gpa,
2478                           writable ? FAULT_FLAG_WRITE : 0);
2479 }
2480
2481 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2482                                       unsigned long token)
2483 {
2484         struct kvm_s390_interrupt inti;
2485         struct kvm_s390_irq irq;
2486
2487         if (start_token) {
2488                 irq.u.ext.ext_params2 = token;
2489                 irq.type = KVM_S390_INT_PFAULT_INIT;
2490                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2491         } else {
2492                 inti.type = KVM_S390_INT_PFAULT_DONE;
2493                 inti.parm64 = token;
2494                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2495         }
2496 }
2497
2498 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2499                                      struct kvm_async_pf *work)
2500 {
2501         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2502         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2503 }
2504
2505 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2506                                  struct kvm_async_pf *work)
2507 {
2508         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2509         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2510 }
2511
2512 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2513                                struct kvm_async_pf *work)
2514 {
2515         /* s390 will always inject the page directly */
2516 }
2517
2518 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2519 {
2520         /*
2521          * s390 will always inject the page directly,
2522          * but we still want check_async_completion to cleanup
2523          */
2524         return true;
2525 }
2526
2527 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2528 {
2529         hva_t hva;
2530         struct kvm_arch_async_pf arch;
2531         int rc;
2532
2533         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2534                 return 0;
2535         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2536             vcpu->arch.pfault_compare)
2537                 return 0;
2538         if (psw_extint_disabled(vcpu))
2539                 return 0;
2540         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2541                 return 0;
2542         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2543                 return 0;
2544         if (!vcpu->arch.gmap->pfault_enabled)
2545                 return 0;
2546
2547         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2548         hva += current->thread.gmap_addr & ~PAGE_MASK;
2549         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2550                 return 0;
2551
2552         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2553         return rc;
2554 }
2555
2556 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2557 {
2558         int rc, cpuflags;
2559
2560         /*
2561          * On s390 notifications for arriving pages will be delivered directly
2562          * to the guest but the house keeping for completed pfaults is
2563          * handled outside the worker.
2564          */
2565         kvm_check_async_pf_completion(vcpu);
2566
2567         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2568         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2569
2570         if (need_resched())
2571                 schedule();
2572
2573         if (test_cpu_flag(CIF_MCCK_PENDING))
2574                 s390_handle_mcck();
2575
2576         if (!kvm_is_ucontrol(vcpu->kvm)) {
2577                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2578                 if (rc)
2579                         return rc;
2580         }
2581
2582         rc = kvm_s390_handle_requests(vcpu);
2583         if (rc)
2584                 return rc;
2585
2586         if (guestdbg_enabled(vcpu)) {
2587                 kvm_s390_backup_guest_per_regs(vcpu);
2588                 kvm_s390_patch_guest_per_regs(vcpu);
2589         }
2590
2591         vcpu->arch.sie_block->icptcode = 0;
2592         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2593         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2594         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2595
2596         return 0;
2597 }
2598
2599 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2600 {
2601         struct kvm_s390_pgm_info pgm_info = {
2602                 .code = PGM_ADDRESSING,
2603         };
2604         u8 opcode, ilen;
2605         int rc;
2606
2607         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2608         trace_kvm_s390_sie_fault(vcpu);
2609
2610         /*
2611          * We want to inject an addressing exception, which is defined as a
2612          * suppressing or terminating exception. However, since we came here
2613          * by a DAT access exception, the PSW still points to the faulting
2614          * instruction since DAT exceptions are nullifying. So we've got
2615          * to look up the current opcode to get the length of the instruction
2616          * to be able to forward the PSW.
2617          */
2618         rc = read_guest_instr(vcpu, &opcode, 1);
2619         ilen = insn_length(opcode);
2620         if (rc < 0) {
2621                 return rc;
2622         } else if (rc) {
2623                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2624                  * Forward by arbitrary ilc, injection will take care of
2625                  * nullification if necessary.
2626                  */
2627                 pgm_info = vcpu->arch.pgm;
2628                 ilen = 4;
2629         }
2630         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2631         kvm_s390_forward_psw(vcpu, ilen);
2632         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2633 }
2634
2635 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2636 {
2637         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2638                    vcpu->arch.sie_block->icptcode);
2639         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2640
2641         if (guestdbg_enabled(vcpu))
2642                 kvm_s390_restore_guest_per_regs(vcpu);
2643
2644         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2645         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2646
2647         if (vcpu->arch.sie_block->icptcode > 0) {
2648                 int rc = kvm_handle_sie_intercept(vcpu);
2649
2650                 if (rc != -EOPNOTSUPP)
2651                         return rc;
2652                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2653                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2654                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2655                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2656                 return -EREMOTE;
2657         } else if (exit_reason != -EFAULT) {
2658                 vcpu->stat.exit_null++;
2659                 return 0;
2660         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2661                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2662                 vcpu->run->s390_ucontrol.trans_exc_code =
2663                                                 current->thread.gmap_addr;
2664                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2665                 return -EREMOTE;
2666         } else if (current->thread.gmap_pfault) {
2667                 trace_kvm_s390_major_guest_pfault(vcpu);
2668                 current->thread.gmap_pfault = 0;
2669                 if (kvm_arch_setup_async_pf(vcpu))
2670                         return 0;
2671                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2672         }
2673         return vcpu_post_run_fault_in_sie(vcpu);
2674 }
2675
2676 static int __vcpu_run(struct kvm_vcpu *vcpu)
2677 {
2678         int rc, exit_reason;
2679
2680         /*
2681          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2682          * ning the guest), so that memslots (and other stuff) are protected
2683          */
2684         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2685
2686         do {
2687                 rc = vcpu_pre_run(vcpu);
2688                 if (rc)
2689                         break;
2690
2691                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2692                 /*
2693                  * As PF_VCPU will be used in fault handler, between
2694                  * guest_enter and guest_exit should be no uaccess.
2695                  */
2696                 local_irq_disable();
2697                 guest_enter_irqoff();
2698                 __disable_cpu_timer_accounting(vcpu);
2699                 local_irq_enable();
2700                 exit_reason = sie64a(vcpu->arch.sie_block,
2701                                      vcpu->run->s.regs.gprs);
2702                 local_irq_disable();
2703                 __enable_cpu_timer_accounting(vcpu);
2704                 guest_exit_irqoff();
2705                 local_irq_enable();
2706                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2707
2708                 rc = vcpu_post_run(vcpu, exit_reason);
2709         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2710
2711         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2712         return rc;
2713 }
2714
2715 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2716 {
2717         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2718         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2719         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2720                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2721         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2722                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2723                 /* some control register changes require a tlb flush */
2724                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2725         }
2726         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2727                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2728                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2729                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2730                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2731                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2732         }
2733         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2734                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2735                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2736                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2737                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2738                         kvm_clear_async_pf_completion_queue(vcpu);
2739         }
2740         /*
2741          * If userspace sets the riccb (e.g. after migration) to a valid state,
2742          * we should enable RI here instead of doing the lazy enablement.
2743          */
2744         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2745             test_kvm_facility(vcpu->kvm, 64)) {
2746                 struct runtime_instr_cb *riccb =
2747                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2748
2749                 if (riccb->valid)
2750                         vcpu->arch.sie_block->ecb3 |= 0x01;
2751         }
2752         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
2753             test_kvm_facility(vcpu->kvm, 82)) {
2754                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2755                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
2756         }
2757
2758         kvm_run->kvm_dirty_regs = 0;
2759 }
2760
2761 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2762 {
2763         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2764         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2765         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2766         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2767         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2768         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2769         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2770         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2771         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2772         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2773         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2774         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2775         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
2776 }
2777
2778 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2779 {
2780         int rc;
2781         sigset_t sigsaved;
2782
2783         if (guestdbg_exit_pending(vcpu)) {
2784                 kvm_s390_prepare_debug_exit(vcpu);
2785                 return 0;
2786         }
2787
2788         if (vcpu->sigset_active)
2789                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2790
2791         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2792                 kvm_s390_vcpu_start(vcpu);
2793         } else if (is_vcpu_stopped(vcpu)) {
2794                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2795                                    vcpu->vcpu_id);
2796                 return -EINVAL;
2797         }
2798
2799         sync_regs(vcpu, kvm_run);
2800         enable_cpu_timer_accounting(vcpu);
2801
2802         might_fault();
2803         rc = __vcpu_run(vcpu);
2804
2805         if (signal_pending(current) && !rc) {
2806                 kvm_run->exit_reason = KVM_EXIT_INTR;
2807                 rc = -EINTR;
2808         }
2809
2810         if (guestdbg_exit_pending(vcpu) && !rc)  {
2811                 kvm_s390_prepare_debug_exit(vcpu);
2812                 rc = 0;
2813         }
2814
2815         if (rc == -EREMOTE) {
2816                 /* userspace support is needed, kvm_run has been prepared */
2817                 rc = 0;
2818         }
2819
2820         disable_cpu_timer_accounting(vcpu);
2821         store_regs(vcpu, kvm_run);
2822
2823         if (vcpu->sigset_active)
2824                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2825
2826         vcpu->stat.exit_userspace++;
2827         return rc;
2828 }
2829
2830 /*
2831  * store status at address
2832  * we use have two special cases:
2833  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2834  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2835  */
2836 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2837 {
2838         unsigned char archmode = 1;
2839         freg_t fprs[NUM_FPRS];
2840         unsigned int px;
2841         u64 clkcomp, cputm;
2842         int rc;
2843
2844         px = kvm_s390_get_prefix(vcpu);
2845         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2846                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2847                         return -EFAULT;
2848                 gpa = 0;
2849         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2850                 if (write_guest_real(vcpu, 163, &archmode, 1))
2851                         return -EFAULT;
2852                 gpa = px;
2853         } else
2854                 gpa -= __LC_FPREGS_SAVE_AREA;
2855
2856         /* manually convert vector registers if necessary */
2857         if (MACHINE_HAS_VX) {
2858                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2859                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2860                                      fprs, 128);
2861         } else {
2862                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2863                                      vcpu->run->s.regs.fprs, 128);
2864         }
2865         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2866                               vcpu->run->s.regs.gprs, 128);
2867         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2868                               &vcpu->arch.sie_block->gpsw, 16);
2869         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2870                               &px, 4);
2871         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2872                               &vcpu->run->s.regs.fpc, 4);
2873         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2874                               &vcpu->arch.sie_block->todpr, 4);
2875         cputm = kvm_s390_get_cpu_timer(vcpu);
2876         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2877                               &cputm, 8);
2878         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2879         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2880                               &clkcomp, 8);
2881         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2882                               &vcpu->run->s.regs.acrs, 64);
2883         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2884                               &vcpu->arch.sie_block->gcr, 128);
2885         return rc ? -EFAULT : 0;
2886 }
2887
2888 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2889 {
2890         /*
2891          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2892          * copying in vcpu load/put. Lets update our copies before we save
2893          * it into the save area
2894          */
2895         save_fpu_regs();
2896         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2897         save_access_regs(vcpu->run->s.regs.acrs);
2898
2899         return kvm_s390_store_status_unloaded(vcpu, addr);
2900 }
2901
2902 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2903 {
2904         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2905         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2906 }
2907
2908 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2909 {
2910         unsigned int i;
2911         struct kvm_vcpu *vcpu;
2912
2913         kvm_for_each_vcpu(i, vcpu, kvm) {
2914                 __disable_ibs_on_vcpu(vcpu);
2915         }
2916 }
2917
2918 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2919 {
2920         if (!sclp.has_ibs)
2921                 return;
2922         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2923         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2924 }
2925
2926 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2927 {
2928         int i, online_vcpus, started_vcpus = 0;
2929
2930         if (!is_vcpu_stopped(vcpu))
2931                 return;
2932
2933         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2934         /* Only one cpu at a time may enter/leave the STOPPED state. */
2935         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2936         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2937
2938         for (i = 0; i < online_vcpus; i++) {
2939                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2940                         started_vcpus++;
2941         }
2942
2943         if (started_vcpus == 0) {
2944                 /* we're the only active VCPU -> speed it up */
2945                 __enable_ibs_on_vcpu(vcpu);
2946         } else if (started_vcpus == 1) {
2947                 /*
2948                  * As we are starting a second VCPU, we have to disable
2949                  * the IBS facility on all VCPUs to remove potentially
2950                  * oustanding ENABLE requests.
2951                  */
2952                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2953         }
2954
2955         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2956         /*
2957          * Another VCPU might have used IBS while we were offline.
2958          * Let's play safe and flush the VCPU at startup.
2959          */
2960         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2961         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2962         return;
2963 }
2964
2965 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2966 {
2967         int i, online_vcpus, started_vcpus = 0;
2968         struct kvm_vcpu *started_vcpu = NULL;
2969
2970         if (is_vcpu_stopped(vcpu))
2971                 return;
2972
2973         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2974         /* Only one cpu at a time may enter/leave the STOPPED state. */
2975         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2976         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2977
2978         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2979         kvm_s390_clear_stop_irq(vcpu);
2980
2981         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2982         __disable_ibs_on_vcpu(vcpu);
2983
2984         for (i = 0; i < online_vcpus; i++) {
2985                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2986                         started_vcpus++;
2987                         started_vcpu = vcpu->kvm->vcpus[i];
2988                 }
2989         }
2990
2991         if (started_vcpus == 1) {
2992                 /*
2993                  * As we only have one VCPU left, we want to enable the
2994                  * IBS facility for that VCPU to speed it up.
2995                  */
2996                 __enable_ibs_on_vcpu(started_vcpu);
2997         }
2998
2999         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3000         return;
3001 }
3002
3003 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3004                                      struct kvm_enable_cap *cap)
3005 {
3006         int r;
3007
3008         if (cap->flags)
3009                 return -EINVAL;
3010
3011         switch (cap->cap) {
3012         case KVM_CAP_S390_CSS_SUPPORT:
3013                 if (!vcpu->kvm->arch.css_support) {
3014                         vcpu->kvm->arch.css_support = 1;
3015                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3016                         trace_kvm_s390_enable_css(vcpu->kvm);
3017                 }
3018                 r = 0;
3019                 break;
3020         default:
3021                 r = -EINVAL;
3022                 break;
3023         }
3024         return r;
3025 }
3026
3027 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3028                                   struct kvm_s390_mem_op *mop)
3029 {
3030         void __user *uaddr = (void __user *)mop->buf;
3031         void *tmpbuf = NULL;
3032         int r, srcu_idx;
3033         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3034                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3035
3036         if (mop->flags & ~supported_flags)
3037                 return -EINVAL;
3038
3039         if (mop->size > MEM_OP_MAX_SIZE)
3040                 return -E2BIG;
3041
3042         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3043                 tmpbuf = vmalloc(mop->size);
3044                 if (!tmpbuf)
3045                         return -ENOMEM;
3046         }
3047
3048         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3049
3050         switch (mop->op) {
3051         case KVM_S390_MEMOP_LOGICAL_READ:
3052                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3053                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3054                                             mop->size, GACC_FETCH);
3055                         break;
3056                 }
3057                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3058                 if (r == 0) {
3059                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3060                                 r = -EFAULT;
3061                 }
3062                 break;
3063         case KVM_S390_MEMOP_LOGICAL_WRITE:
3064                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3065                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3066                                             mop->size, GACC_STORE);
3067                         break;
3068                 }
3069                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3070                         r = -EFAULT;
3071                         break;
3072                 }
3073                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3074                 break;
3075         default:
3076                 r = -EINVAL;
3077         }
3078
3079         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3080
3081         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3082                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3083
3084         vfree(tmpbuf);
3085         return r;
3086 }
3087
3088 long kvm_arch_vcpu_ioctl(struct file *filp,
3089                          unsigned int ioctl, unsigned long arg)
3090 {
3091         struct kvm_vcpu *vcpu = filp->private_data;
3092         void __user *argp = (void __user *)arg;
3093         int idx;
3094         long r;
3095
3096         switch (ioctl) {
3097         case KVM_S390_IRQ: {
3098                 struct kvm_s390_irq s390irq;
3099
3100                 r = -EFAULT;
3101                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3102                         break;
3103                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3104                 break;
3105         }
3106         case KVM_S390_INTERRUPT: {
3107                 struct kvm_s390_interrupt s390int;
3108                 struct kvm_s390_irq s390irq = {};
3109
3110                 r = -EFAULT;
3111                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3112                         break;
3113                 if (s390int_to_s390irq(&s390int, &s390irq))
3114                         return -EINVAL;
3115                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3116                 break;
3117         }
3118         case KVM_S390_STORE_STATUS:
3119                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3120                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3121                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3122                 break;
3123         case KVM_S390_SET_INITIAL_PSW: {
3124                 psw_t psw;
3125
3126                 r = -EFAULT;
3127                 if (copy_from_user(&psw, argp, sizeof(psw)))
3128                         break;
3129                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3130                 break;
3131         }
3132         case KVM_S390_INITIAL_RESET:
3133                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3134                 break;
3135         case KVM_SET_ONE_REG:
3136         case KVM_GET_ONE_REG: {
3137                 struct kvm_one_reg reg;
3138                 r = -EFAULT;
3139                 if (copy_from_user(&reg, argp, sizeof(reg)))
3140                         break;
3141                 if (ioctl == KVM_SET_ONE_REG)
3142                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3143                 else
3144                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3145                 break;
3146         }
3147 #ifdef CONFIG_KVM_S390_UCONTROL
3148         case KVM_S390_UCAS_MAP: {
3149                 struct kvm_s390_ucas_mapping ucasmap;
3150
3151                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3152                         r = -EFAULT;
3153                         break;
3154                 }
3155
3156                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3157                         r = -EINVAL;
3158                         break;
3159                 }
3160
3161                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3162                                      ucasmap.vcpu_addr, ucasmap.length);
3163                 break;
3164         }
3165         case KVM_S390_UCAS_UNMAP: {
3166                 struct kvm_s390_ucas_mapping ucasmap;
3167
3168                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3169                         r = -EFAULT;
3170                         break;
3171                 }
3172
3173                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3174                         r = -EINVAL;
3175                         break;
3176                 }
3177
3178                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3179                         ucasmap.length);
3180                 break;
3181         }
3182 #endif
3183         case KVM_S390_VCPU_FAULT: {
3184                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3185                 break;
3186         }
3187         case KVM_ENABLE_CAP:
3188         {
3189                 struct kvm_enable_cap cap;
3190                 r = -EFAULT;
3191                 if (copy_from_user(&cap, argp, sizeof(cap)))
3192                         break;
3193                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3194                 break;
3195         }
3196         case KVM_S390_MEM_OP: {
3197                 struct kvm_s390_mem_op mem_op;
3198
3199                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3200                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3201                 else
3202                         r = -EFAULT;
3203                 break;
3204         }
3205         case KVM_S390_SET_IRQ_STATE: {
3206                 struct kvm_s390_irq_state irq_state;
3207
3208                 r = -EFAULT;
3209                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3210                         break;
3211                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3212                     irq_state.len == 0 ||
3213                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3214                         r = -EINVAL;
3215                         break;
3216                 }
3217                 r = kvm_s390_set_irq_state(vcpu,
3218                                            (void __user *) irq_state.buf,
3219                                            irq_state.len);
3220                 break;
3221         }
3222         case KVM_S390_GET_IRQ_STATE: {
3223                 struct kvm_s390_irq_state irq_state;
3224
3225                 r = -EFAULT;
3226                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3227                         break;
3228                 if (irq_state.len == 0) {
3229                         r = -EINVAL;
3230                         break;
3231                 }
3232                 r = kvm_s390_get_irq_state(vcpu,
3233                                            (__u8 __user *)  irq_state.buf,
3234                                            irq_state.len);
3235                 break;
3236         }
3237         default:
3238                 r = -ENOTTY;
3239         }
3240         return r;
3241 }
3242
3243 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3244 {
3245 #ifdef CONFIG_KVM_S390_UCONTROL
3246         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3247                  && (kvm_is_ucontrol(vcpu->kvm))) {
3248                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3249                 get_page(vmf->page);
3250                 return 0;
3251         }
3252 #endif
3253         return VM_FAULT_SIGBUS;
3254 }
3255
3256 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3257                             unsigned long npages)
3258 {
3259         return 0;
3260 }
3261
3262 /* Section: memory related */
3263 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3264                                    struct kvm_memory_slot *memslot,
3265                                    const struct kvm_userspace_memory_region *mem,
3266                                    enum kvm_mr_change change)
3267 {
3268         /* A few sanity checks. We can have memory slots which have to be
3269            located/ended at a segment boundary (1MB). The memory in userland is
3270            ok to be fragmented into various different vmas. It is okay to mmap()
3271            and munmap() stuff in this slot after doing this call at any time */
3272
3273         if (mem->userspace_addr & 0xffffful)
3274                 return -EINVAL;
3275
3276         if (mem->memory_size & 0xffffful)
3277                 return -EINVAL;
3278
3279         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3280                 return -EINVAL;
3281
3282         return 0;
3283 }
3284
3285 void kvm_arch_commit_memory_region(struct kvm *kvm,
3286                                 const struct kvm_userspace_memory_region *mem,
3287                                 const struct kvm_memory_slot *old,
3288                                 const struct kvm_memory_slot *new,
3289                                 enum kvm_mr_change change)
3290 {
3291         int rc = 0;
3292
3293         switch (change) {
3294         case KVM_MR_DELETE:
3295                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3296                                         old->npages * PAGE_SIZE);
3297                 break;
3298         case KVM_MR_MOVE:
3299                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3300                                         old->npages * PAGE_SIZE);
3301                 if (rc)
3302                         break;
3303                 /* FALLTHROUGH */
3304         case KVM_MR_CREATE:
3305                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3306                                       mem->guest_phys_addr, mem->memory_size);
3307                 break;
3308         case KVM_MR_FLAGS_ONLY:
3309                 break;
3310         default:
3311                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
3312         }
3313         if (rc)
3314                 pr_warn("failed to commit memory region\n");
3315         return;
3316 }
3317
3318 static inline unsigned long nonhyp_mask(int i)
3319 {
3320         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3321
3322         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3323 }
3324
3325 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3326 {
3327         vcpu->valid_wakeup = false;
3328 }
3329
3330 static int __init kvm_s390_init(void)
3331 {
3332         int i;
3333
3334         if (!sclp.has_sief2) {
3335                 pr_info("SIE not available\n");
3336                 return -ENODEV;
3337         }
3338
3339         for (i = 0; i < 16; i++)
3340                 kvm_s390_fac_list_mask[i] |=
3341                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3342
3343         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3344 }
3345
3346 static void __exit kvm_s390_exit(void)
3347 {
3348         kvm_exit();
3349 }
3350
3351 module_init(kvm_s390_init);
3352 module_exit(kvm_s390_exit);
3353
3354 /*
3355  * Enable autoloading of the kvm module.
3356  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3357  * since x86 takes a different approach.
3358  */
3359 #include <linux/miscdevice.h>
3360 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3361 MODULE_ALIAS("devname:kvm");