OSDN Git Service

arm/arm64: KVM: introduce per-VM ops
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / virt / kvm / arm / vgic.c
1 /*
2  * Copyright (C) 2012 ARM Ltd.
3  * Author: Marc Zyngier <marc.zyngier@arm.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/kvm.h>
21 #include <linux/kvm_host.h>
22 #include <linux/interrupt.h>
23 #include <linux/io.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_irq.h>
27 #include <linux/uaccess.h>
28
29 #include <linux/irqchip/arm-gic.h>
30
31 #include <asm/kvm_emulate.h>
32 #include <asm/kvm_arm.h>
33 #include <asm/kvm_mmu.h>
34
35 /*
36  * How the whole thing works (courtesy of Christoffer Dall):
37  *
38  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
39  *   something is pending on the CPU interface.
40  * - Interrupts that are pending on the distributor are stored on the
41  *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
42  *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
43  *   arch. timers).
44  * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
45  *   recalculated
46  * - To calculate the oracle, we need info for each cpu from
47  *   compute_pending_for_cpu, which considers:
48  *   - PPI: dist->irq_pending & dist->irq_enable
49  *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
50  *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
51  *     registers, stored on each vcpu. We only keep one bit of
52  *     information per interrupt, making sure that only one vcpu can
53  *     accept the interrupt.
54  * - If any of the above state changes, we must recalculate the oracle.
55  * - The same is true when injecting an interrupt, except that we only
56  *   consider a single interrupt at a time. The irq_spi_cpu array
57  *   contains the target CPU for each SPI.
58  *
59  * The handling of level interrupts adds some extra complexity. We
60  * need to track when the interrupt has been EOIed, so we can sample
61  * the 'line' again. This is achieved as such:
62  *
63  * - When a level interrupt is moved onto a vcpu, the corresponding
64  *   bit in irq_queued is set. As long as this bit is set, the line
65  *   will be ignored for further interrupts. The interrupt is injected
66  *   into the vcpu with the GICH_LR_EOI bit set (generate a
67  *   maintenance interrupt on EOI).
68  * - When the interrupt is EOIed, the maintenance interrupt fires,
69  *   and clears the corresponding bit in irq_queued. This allows the
70  *   interrupt line to be sampled again.
71  * - Note that level-triggered interrupts can also be set to pending from
72  *   writes to GICD_ISPENDRn and lowering the external input line does not
73  *   cause the interrupt to become inactive in such a situation.
74  *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
75  *   inactive as long as the external input line is held high.
76  */
77
78 #define VGIC_ADDR_UNDEF         (-1)
79 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
80
81 #define PRODUCT_ID_KVM          0x4b    /* ASCII code K */
82 #define IMPLEMENTER_ARM         0x43b
83 #define GICC_ARCH_VERSION_V2    0x2
84
85 #define ACCESS_READ_VALUE       (1 << 0)
86 #define ACCESS_READ_RAZ         (0 << 0)
87 #define ACCESS_READ_MASK(x)     ((x) & (1 << 0))
88 #define ACCESS_WRITE_IGNORED    (0 << 1)
89 #define ACCESS_WRITE_SETBIT     (1 << 1)
90 #define ACCESS_WRITE_CLEARBIT   (2 << 1)
91 #define ACCESS_WRITE_VALUE      (3 << 1)
92 #define ACCESS_WRITE_MASK(x)    ((x) & (3 << 1))
93
94 static int vgic_init(struct kvm *kvm);
95 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
96 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
97 static void vgic_update_state(struct kvm *kvm);
98 static void vgic_kick_vcpus(struct kvm *kvm);
99 static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
100 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
101 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
102 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
103 static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
104 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
105
106 static const struct vgic_ops *vgic_ops;
107 static const struct vgic_params *vgic;
108
109 static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
110 {
111         vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source);
112 }
113
114 static bool queue_sgi(struct kvm_vcpu *vcpu, int irq)
115 {
116         return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq);
117 }
118
119 int kvm_vgic_map_resources(struct kvm *kvm)
120 {
121         return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic);
122 }
123
124 /*
125  * struct vgic_bitmap contains a bitmap made of unsigned longs, but
126  * extracts u32s out of them.
127  *
128  * This does not work on 64-bit BE systems, because the bitmap access
129  * will store two consecutive 32-bit words with the higher-addressed
130  * register's bits at the lower index and the lower-addressed register's
131  * bits at the higher index.
132  *
133  * Therefore, swizzle the register index when accessing the 32-bit word
134  * registers to access the right register's value.
135  */
136 #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
137 #define REG_OFFSET_SWIZZLE      1
138 #else
139 #define REG_OFFSET_SWIZZLE      0
140 #endif
141
142 static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
143 {
144         int nr_longs;
145
146         nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
147
148         b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
149         if (!b->private)
150                 return -ENOMEM;
151
152         b->shared = b->private + nr_cpus;
153
154         return 0;
155 }
156
157 static void vgic_free_bitmap(struct vgic_bitmap *b)
158 {
159         kfree(b->private);
160         b->private = NULL;
161         b->shared = NULL;
162 }
163
164 /*
165  * Call this function to convert a u64 value to an unsigned long * bitmask
166  * in a way that works on both 32-bit and 64-bit LE and BE platforms.
167  *
168  * Warning: Calling this function may modify *val.
169  */
170 static unsigned long *u64_to_bitmask(u64 *val)
171 {
172 #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
173         *val = (*val >> 32) | (*val << 32);
174 #endif
175         return (unsigned long *)val;
176 }
177
178 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
179                                 int cpuid, u32 offset)
180 {
181         offset >>= 2;
182         if (!offset)
183                 return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
184         else
185                 return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
186 }
187
188 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
189                                    int cpuid, int irq)
190 {
191         if (irq < VGIC_NR_PRIVATE_IRQS)
192                 return test_bit(irq, x->private + cpuid);
193
194         return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
195 }
196
197 static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
198                                     int irq, int val)
199 {
200         unsigned long *reg;
201
202         if (irq < VGIC_NR_PRIVATE_IRQS) {
203                 reg = x->private + cpuid;
204         } else {
205                 reg = x->shared;
206                 irq -= VGIC_NR_PRIVATE_IRQS;
207         }
208
209         if (val)
210                 set_bit(irq, reg);
211         else
212                 clear_bit(irq, reg);
213 }
214
215 static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
216 {
217         return x->private + cpuid;
218 }
219
220 static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
221 {
222         return x->shared;
223 }
224
225 static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
226 {
227         int size;
228
229         size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
230         size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
231
232         x->private = kzalloc(size, GFP_KERNEL);
233         if (!x->private)
234                 return -ENOMEM;
235
236         x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
237         return 0;
238 }
239
240 static void vgic_free_bytemap(struct vgic_bytemap *b)
241 {
242         kfree(b->private);
243         b->private = NULL;
244         b->shared = NULL;
245 }
246
247 static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
248 {
249         u32 *reg;
250
251         if (offset < VGIC_NR_PRIVATE_IRQS) {
252                 reg = x->private;
253                 offset += cpuid * VGIC_NR_PRIVATE_IRQS;
254         } else {
255                 reg = x->shared;
256                 offset -= VGIC_NR_PRIVATE_IRQS;
257         }
258
259         return reg + (offset / sizeof(u32));
260 }
261
262 #define VGIC_CFG_LEVEL  0
263 #define VGIC_CFG_EDGE   1
264
265 static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
266 {
267         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
268         int irq_val;
269
270         irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
271         return irq_val == VGIC_CFG_EDGE;
272 }
273
274 static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
275 {
276         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
277
278         return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
279 }
280
281 static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
282 {
283         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
284
285         return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
286 }
287
288 static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
289 {
290         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
291
292         vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
293 }
294
295 static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
296 {
297         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
298
299         vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
300 }
301
302 static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
303 {
304         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
305
306         return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
307 }
308
309 static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
310 {
311         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
312
313         vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
314 }
315
316 static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
317 {
318         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
319
320         vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
321 }
322
323 static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
324 {
325         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
326
327         return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
328 }
329
330 static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
331 {
332         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
333
334         vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
335 }
336
337 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
338 {
339         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
340
341         return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
342 }
343
344 static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
345 {
346         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
347
348         vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
349 }
350
351 static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
352 {
353         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
354
355         vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
356 }
357
358 static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
359 {
360         if (irq < VGIC_NR_PRIVATE_IRQS)
361                 set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
362         else
363                 set_bit(irq - VGIC_NR_PRIVATE_IRQS,
364                         vcpu->arch.vgic_cpu.pending_shared);
365 }
366
367 static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
368 {
369         if (irq < VGIC_NR_PRIVATE_IRQS)
370                 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
371         else
372                 clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
373                           vcpu->arch.vgic_cpu.pending_shared);
374 }
375
376 static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
377 {
378         return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
379 }
380
381 static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
382 {
383         return le32_to_cpu(*((u32 *)mmio->data)) & mask;
384 }
385
386 static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
387 {
388         *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
389 }
390
391 /**
392  * vgic_reg_access - access vgic register
393  * @mmio:   pointer to the data describing the mmio access
394  * @reg:    pointer to the virtual backing of vgic distributor data
395  * @offset: least significant 2 bits used for word offset
396  * @mode:   ACCESS_ mode (see defines above)
397  *
398  * Helper to make vgic register access easier using one of the access
399  * modes defined for vgic register access
400  * (read,raz,write-ignored,setbit,clearbit,write)
401  */
402 static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
403                             phys_addr_t offset, int mode)
404 {
405         int word_offset = (offset & 3) * 8;
406         u32 mask = (1UL << (mmio->len * 8)) - 1;
407         u32 regval;
408
409         /*
410          * Any alignment fault should have been delivered to the guest
411          * directly (ARM ARM B3.12.7 "Prioritization of aborts").
412          */
413
414         if (reg) {
415                 regval = *reg;
416         } else {
417                 BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
418                 regval = 0;
419         }
420
421         if (mmio->is_write) {
422                 u32 data = mmio_data_read(mmio, mask) << word_offset;
423                 switch (ACCESS_WRITE_MASK(mode)) {
424                 case ACCESS_WRITE_IGNORED:
425                         return;
426
427                 case ACCESS_WRITE_SETBIT:
428                         regval |= data;
429                         break;
430
431                 case ACCESS_WRITE_CLEARBIT:
432                         regval &= ~data;
433                         break;
434
435                 case ACCESS_WRITE_VALUE:
436                         regval = (regval & ~(mask << word_offset)) | data;
437                         break;
438                 }
439                 *reg = regval;
440         } else {
441                 switch (ACCESS_READ_MASK(mode)) {
442                 case ACCESS_READ_RAZ:
443                         regval = 0;
444                         /* fall through */
445
446                 case ACCESS_READ_VALUE:
447                         mmio_data_write(mmio, mask, regval >> word_offset);
448                 }
449         }
450 }
451
452 static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
453                              struct kvm_exit_mmio *mmio, phys_addr_t offset)
454 {
455         u32 reg;
456         u32 word_offset = offset & 3;
457
458         switch (offset & ~3) {
459         case 0:                 /* GICD_CTLR */
460                 reg = vcpu->kvm->arch.vgic.enabled;
461                 vgic_reg_access(mmio, &reg, word_offset,
462                                 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
463                 if (mmio->is_write) {
464                         vcpu->kvm->arch.vgic.enabled = reg & 1;
465                         vgic_update_state(vcpu->kvm);
466                         return true;
467                 }
468                 break;
469
470         case 4:                 /* GICD_TYPER */
471                 reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
472                 reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
473                 vgic_reg_access(mmio, &reg, word_offset,
474                                 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
475                 break;
476
477         case 8:                 /* GICD_IIDR */
478                 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
479                 vgic_reg_access(mmio, &reg, word_offset,
480                                 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
481                 break;
482         }
483
484         return false;
485 }
486
487 static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
488                                struct kvm_exit_mmio *mmio, phys_addr_t offset)
489 {
490         vgic_reg_access(mmio, NULL, offset,
491                         ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
492         return false;
493 }
494
495 static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
496                                        struct kvm_exit_mmio *mmio,
497                                        phys_addr_t offset)
498 {
499         u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
500                                        vcpu->vcpu_id, offset);
501         vgic_reg_access(mmio, reg, offset,
502                         ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
503         if (mmio->is_write) {
504                 vgic_update_state(vcpu->kvm);
505                 return true;
506         }
507
508         return false;
509 }
510
511 static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
512                                          struct kvm_exit_mmio *mmio,
513                                          phys_addr_t offset)
514 {
515         u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
516                                        vcpu->vcpu_id, offset);
517         vgic_reg_access(mmio, reg, offset,
518                         ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
519         if (mmio->is_write) {
520                 if (offset < 4) /* Force SGI enabled */
521                         *reg |= 0xffff;
522                 vgic_retire_disabled_irqs(vcpu);
523                 vgic_update_state(vcpu->kvm);
524                 return true;
525         }
526
527         return false;
528 }
529
530 static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
531                                         struct kvm_exit_mmio *mmio,
532                                         phys_addr_t offset)
533 {
534         u32 *reg, orig;
535         u32 level_mask;
536         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
537
538         reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
539         level_mask = (~(*reg));
540
541         /* Mark both level and edge triggered irqs as pending */
542         reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
543         orig = *reg;
544         vgic_reg_access(mmio, reg, offset,
545                         ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
546
547         if (mmio->is_write) {
548                 /* Set the soft-pending flag only for level-triggered irqs */
549                 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
550                                           vcpu->vcpu_id, offset);
551                 vgic_reg_access(mmio, reg, offset,
552                                 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
553                 *reg &= level_mask;
554
555                 /* Ignore writes to SGIs */
556                 if (offset < 2) {
557                         *reg &= ~0xffff;
558                         *reg |= orig & 0xffff;
559                 }
560
561                 vgic_update_state(vcpu->kvm);
562                 return true;
563         }
564
565         return false;
566 }
567
568 static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
569                                           struct kvm_exit_mmio *mmio,
570                                           phys_addr_t offset)
571 {
572         u32 *level_active;
573         u32 *reg, orig;
574         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
575
576         reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
577         orig = *reg;
578         vgic_reg_access(mmio, reg, offset,
579                         ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
580         if (mmio->is_write) {
581                 /* Re-set level triggered level-active interrupts */
582                 level_active = vgic_bitmap_get_reg(&dist->irq_level,
583                                           vcpu->vcpu_id, offset);
584                 reg = vgic_bitmap_get_reg(&dist->irq_pending,
585                                           vcpu->vcpu_id, offset);
586                 *reg |= *level_active;
587
588                 /* Ignore writes to SGIs */
589                 if (offset < 2) {
590                         *reg &= ~0xffff;
591                         *reg |= orig & 0xffff;
592                 }
593
594                 /* Clear soft-pending flags */
595                 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
596                                           vcpu->vcpu_id, offset);
597                 vgic_reg_access(mmio, reg, offset,
598                                 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
599
600                 vgic_update_state(vcpu->kvm);
601                 return true;
602         }
603
604         return false;
605 }
606
607 static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
608                                      struct kvm_exit_mmio *mmio,
609                                      phys_addr_t offset)
610 {
611         u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
612                                         vcpu->vcpu_id, offset);
613         vgic_reg_access(mmio, reg, offset,
614                         ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
615         return false;
616 }
617
618 #define GICD_ITARGETSR_SIZE     32
619 #define GICD_CPUTARGETS_BITS    8
620 #define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
621 static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
622 {
623         struct vgic_dist *dist = &kvm->arch.vgic;
624         int i;
625         u32 val = 0;
626
627         irq -= VGIC_NR_PRIVATE_IRQS;
628
629         for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
630                 val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
631
632         return val;
633 }
634
635 static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
636 {
637         struct vgic_dist *dist = &kvm->arch.vgic;
638         struct kvm_vcpu *vcpu;
639         int i, c;
640         unsigned long *bmap;
641         u32 target;
642
643         irq -= VGIC_NR_PRIVATE_IRQS;
644
645         /*
646          * Pick the LSB in each byte. This ensures we target exactly
647          * one vcpu per IRQ. If the byte is null, assume we target
648          * CPU0.
649          */
650         for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
651                 int shift = i * GICD_CPUTARGETS_BITS;
652                 target = ffs((val >> shift) & 0xffU);
653                 target = target ? (target - 1) : 0;
654                 dist->irq_spi_cpu[irq + i] = target;
655                 kvm_for_each_vcpu(c, vcpu, kvm) {
656                         bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
657                         if (c == target)
658                                 set_bit(irq + i, bmap);
659                         else
660                                 clear_bit(irq + i, bmap);
661                 }
662         }
663 }
664
665 static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
666                                    struct kvm_exit_mmio *mmio,
667                                    phys_addr_t offset)
668 {
669         u32 reg;
670
671         /* We treat the banked interrupts targets as read-only */
672         if (offset < 32) {
673                 u32 roreg = 1 << vcpu->vcpu_id;
674                 roreg |= roreg << 8;
675                 roreg |= roreg << 16;
676
677                 vgic_reg_access(mmio, &roreg, offset,
678                                 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
679                 return false;
680         }
681
682         reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
683         vgic_reg_access(mmio, &reg, offset,
684                         ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
685         if (mmio->is_write) {
686                 vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
687                 vgic_update_state(vcpu->kvm);
688                 return true;
689         }
690
691         return false;
692 }
693
694 static u32 vgic_cfg_expand(u16 val)
695 {
696         u32 res = 0;
697         int i;
698
699         /*
700          * Turn a 16bit value like abcd...mnop into a 32bit word
701          * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
702          */
703         for (i = 0; i < 16; i++)
704                 res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
705
706         return res;
707 }
708
709 static u16 vgic_cfg_compress(u32 val)
710 {
711         u16 res = 0;
712         int i;
713
714         /*
715          * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
716          * abcd...mnop which is what we really care about.
717          */
718         for (i = 0; i < 16; i++)
719                 res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
720
721         return res;
722 }
723
724 /*
725  * The distributor uses 2 bits per IRQ for the CFG register, but the
726  * LSB is always 0. As such, we only keep the upper bit, and use the
727  * two above functions to compress/expand the bits
728  */
729 static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
730                                 struct kvm_exit_mmio *mmio, phys_addr_t offset)
731 {
732         u32 val;
733         u32 *reg;
734
735         reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
736                                   vcpu->vcpu_id, offset >> 1);
737
738         if (offset & 4)
739                 val = *reg >> 16;
740         else
741                 val = *reg & 0xffff;
742
743         val = vgic_cfg_expand(val);
744         vgic_reg_access(mmio, &val, offset,
745                         ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
746         if (mmio->is_write) {
747                 if (offset < 8) {
748                         *reg = ~0U; /* Force PPIs/SGIs to 1 */
749                         return false;
750                 }
751
752                 val = vgic_cfg_compress(val);
753                 if (offset & 4) {
754                         *reg &= 0xffff;
755                         *reg |= val << 16;
756                 } else {
757                         *reg &= 0xffff << 16;
758                         *reg |= val;
759                 }
760         }
761
762         return false;
763 }
764
765 static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
766                                 struct kvm_exit_mmio *mmio, phys_addr_t offset)
767 {
768         u32 reg;
769         vgic_reg_access(mmio, &reg, offset,
770                         ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
771         if (mmio->is_write) {
772                 vgic_dispatch_sgi(vcpu, reg);
773                 vgic_update_state(vcpu->kvm);
774                 return true;
775         }
776
777         return false;
778 }
779
780 static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
781 {
782         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
783
784         *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source;
785 }
786
787 /**
788  * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
789  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
790  *
791  * Move any pending IRQs that have already been assigned to LRs back to the
792  * emulated distributor state so that the complete emulated state can be read
793  * from the main emulation structures without investigating the LRs.
794  *
795  * Note that IRQs in the active state in the LRs get their pending state moved
796  * to the distributor but the active state stays in the LRs, because we don't
797  * track the active state on the distributor side.
798  */
799 static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
800 {
801         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
802         int i;
803
804         for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
805                 struct vgic_lr lr = vgic_get_lr(vcpu, i);
806
807                 /*
808                  * There are three options for the state bits:
809                  *
810                  * 01: pending
811                  * 10: active
812                  * 11: pending and active
813                  *
814                  * If the LR holds only an active interrupt (not pending) then
815                  * just leave it alone.
816                  */
817                 if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
818                         continue;
819
820                 /*
821                  * Reestablish the pending state on the distributor and the
822                  * CPU interface.  It may have already been pending, but that
823                  * is fine, then we are only setting a few bits that were
824                  * already set.
825                  */
826                 vgic_dist_irq_set_pending(vcpu, lr.irq);
827                 if (lr.irq < VGIC_NR_SGIS)
828                         add_sgi_source(vcpu, lr.irq, lr.source);
829                 lr.state &= ~LR_STATE_PENDING;
830                 vgic_set_lr(vcpu, i, lr);
831
832                 /*
833                  * If there's no state left on the LR (it could still be
834                  * active), then the LR does not hold any useful info and can
835                  * be marked as free for other use.
836                  */
837                 if (!(lr.state & LR_STATE_MASK)) {
838                         vgic_retire_lr(i, lr.irq, vcpu);
839                         vgic_irq_clear_queued(vcpu, lr.irq);
840                 }
841
842                 /* Finally update the VGIC state. */
843                 vgic_update_state(vcpu->kvm);
844         }
845 }
846
847 /* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
848 static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
849                                         struct kvm_exit_mmio *mmio,
850                                         phys_addr_t offset)
851 {
852         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
853         int sgi;
854         int min_sgi = (offset & ~0x3);
855         int max_sgi = min_sgi + 3;
856         int vcpu_id = vcpu->vcpu_id;
857         u32 reg = 0;
858
859         /* Copy source SGIs from distributor side */
860         for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
861                 int shift = 8 * (sgi - min_sgi);
862                 reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
863         }
864
865         mmio_data_write(mmio, ~0, reg);
866         return false;
867 }
868
869 static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
870                                          struct kvm_exit_mmio *mmio,
871                                          phys_addr_t offset, bool set)
872 {
873         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
874         int sgi;
875         int min_sgi = (offset & ~0x3);
876         int max_sgi = min_sgi + 3;
877         int vcpu_id = vcpu->vcpu_id;
878         u32 reg;
879         bool updated = false;
880
881         reg = mmio_data_read(mmio, ~0);
882
883         /* Clear pending SGIs on the distributor */
884         for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
885                 u8 mask = reg >> (8 * (sgi - min_sgi));
886                 u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
887                 if (set) {
888                         if ((*src & mask) != mask)
889                                 updated = true;
890                         *src |= mask;
891                 } else {
892                         if (*src & mask)
893                                 updated = true;
894                         *src &= ~mask;
895                 }
896         }
897
898         if (updated)
899                 vgic_update_state(vcpu->kvm);
900
901         return updated;
902 }
903
904 static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
905                                 struct kvm_exit_mmio *mmio,
906                                 phys_addr_t offset)
907 {
908         if (!mmio->is_write)
909                 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
910         else
911                 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
912 }
913
914 static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
915                                   struct kvm_exit_mmio *mmio,
916                                   phys_addr_t offset)
917 {
918         if (!mmio->is_write)
919                 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
920         else
921                 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
922 }
923
924 /*
925  * I would have liked to use the kvm_bus_io_*() API instead, but it
926  * cannot cope with banked registers (only the VM pointer is passed
927  * around, and we need the vcpu). One of these days, someone please
928  * fix it!
929  */
930 struct mmio_range {
931         phys_addr_t base;
932         unsigned long len;
933         int bits_per_irq;
934         bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
935                             phys_addr_t offset);
936 };
937
938 static const struct mmio_range vgic_dist_ranges[] = {
939         {
940                 .base           = GIC_DIST_CTRL,
941                 .len            = 12,
942                 .bits_per_irq   = 0,
943                 .handle_mmio    = handle_mmio_misc,
944         },
945         {
946                 .base           = GIC_DIST_IGROUP,
947                 .len            = VGIC_MAX_IRQS / 8,
948                 .bits_per_irq   = 1,
949                 .handle_mmio    = handle_mmio_raz_wi,
950         },
951         {
952                 .base           = GIC_DIST_ENABLE_SET,
953                 .len            = VGIC_MAX_IRQS / 8,
954                 .bits_per_irq   = 1,
955                 .handle_mmio    = handle_mmio_set_enable_reg,
956         },
957         {
958                 .base           = GIC_DIST_ENABLE_CLEAR,
959                 .len            = VGIC_MAX_IRQS / 8,
960                 .bits_per_irq   = 1,
961                 .handle_mmio    = handle_mmio_clear_enable_reg,
962         },
963         {
964                 .base           = GIC_DIST_PENDING_SET,
965                 .len            = VGIC_MAX_IRQS / 8,
966                 .bits_per_irq   = 1,
967                 .handle_mmio    = handle_mmio_set_pending_reg,
968         },
969         {
970                 .base           = GIC_DIST_PENDING_CLEAR,
971                 .len            = VGIC_MAX_IRQS / 8,
972                 .bits_per_irq   = 1,
973                 .handle_mmio    = handle_mmio_clear_pending_reg,
974         },
975         {
976                 .base           = GIC_DIST_ACTIVE_SET,
977                 .len            = VGIC_MAX_IRQS / 8,
978                 .bits_per_irq   = 1,
979                 .handle_mmio    = handle_mmio_raz_wi,
980         },
981         {
982                 .base           = GIC_DIST_ACTIVE_CLEAR,
983                 .len            = VGIC_MAX_IRQS / 8,
984                 .bits_per_irq   = 1,
985                 .handle_mmio    = handle_mmio_raz_wi,
986         },
987         {
988                 .base           = GIC_DIST_PRI,
989                 .len            = VGIC_MAX_IRQS,
990                 .bits_per_irq   = 8,
991                 .handle_mmio    = handle_mmio_priority_reg,
992         },
993         {
994                 .base           = GIC_DIST_TARGET,
995                 .len            = VGIC_MAX_IRQS,
996                 .bits_per_irq   = 8,
997                 .handle_mmio    = handle_mmio_target_reg,
998         },
999         {
1000                 .base           = GIC_DIST_CONFIG,
1001                 .len            = VGIC_MAX_IRQS / 4,
1002                 .bits_per_irq   = 2,
1003                 .handle_mmio    = handle_mmio_cfg_reg,
1004         },
1005         {
1006                 .base           = GIC_DIST_SOFTINT,
1007                 .len            = 4,
1008                 .handle_mmio    = handle_mmio_sgi_reg,
1009         },
1010         {
1011                 .base           = GIC_DIST_SGI_PENDING_CLEAR,
1012                 .len            = VGIC_NR_SGIS,
1013                 .handle_mmio    = handle_mmio_sgi_clear,
1014         },
1015         {
1016                 .base           = GIC_DIST_SGI_PENDING_SET,
1017                 .len            = VGIC_NR_SGIS,
1018                 .handle_mmio    = handle_mmio_sgi_set,
1019         },
1020         {}
1021 };
1022
1023 static const
1024 struct mmio_range *find_matching_range(const struct mmio_range *ranges,
1025                                        struct kvm_exit_mmio *mmio,
1026                                        phys_addr_t offset)
1027 {
1028         const struct mmio_range *r = ranges;
1029
1030         while (r->len) {
1031                 if (offset >= r->base &&
1032                     (offset + mmio->len) <= (r->base + r->len))
1033                         return r;
1034                 r++;
1035         }
1036
1037         return NULL;
1038 }
1039
1040 static bool vgic_validate_access(const struct vgic_dist *dist,
1041                                  const struct mmio_range *range,
1042                                  unsigned long offset)
1043 {
1044         int irq;
1045
1046         if (!range->bits_per_irq)
1047                 return true;    /* Not an irq-based access */
1048
1049         irq = offset * 8 / range->bits_per_irq;
1050         if (irq >= dist->nr_irqs)
1051                 return false;
1052
1053         return true;
1054 }
1055
1056 /*
1057  * Call the respective handler function for the given range.
1058  * We split up any 64 bit accesses into two consecutive 32 bit
1059  * handler calls and merge the result afterwards.
1060  * We do this in a little endian fashion regardless of the host's
1061  * or guest's endianness, because the GIC is always LE and the rest of
1062  * the code (vgic_reg_access) also puts it in a LE fashion already.
1063  * At this point we have already identified the handle function, so
1064  * range points to that one entry and offset is relative to this.
1065  */
1066 static bool call_range_handler(struct kvm_vcpu *vcpu,
1067                                struct kvm_exit_mmio *mmio,
1068                                unsigned long offset,
1069                                const struct mmio_range *range)
1070 {
1071         u32 *data32 = (void *)mmio->data;
1072         struct kvm_exit_mmio mmio32;
1073         bool ret;
1074
1075         if (likely(mmio->len <= 4))
1076                 return range->handle_mmio(vcpu, mmio, offset);
1077
1078         /*
1079          * Any access bigger than 4 bytes (that we currently handle in KVM)
1080          * is actually 8 bytes long, caused by a 64-bit access
1081          */
1082
1083         mmio32.len = 4;
1084         mmio32.is_write = mmio->is_write;
1085
1086         mmio32.phys_addr = mmio->phys_addr + 4;
1087         if (mmio->is_write)
1088                 *(u32 *)mmio32.data = data32[1];
1089         ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
1090         if (!mmio->is_write)
1091                 data32[1] = *(u32 *)mmio32.data;
1092
1093         mmio32.phys_addr = mmio->phys_addr;
1094         if (mmio->is_write)
1095                 *(u32 *)mmio32.data = data32[0];
1096         ret |= range->handle_mmio(vcpu, &mmio32, offset);
1097         if (!mmio->is_write)
1098                 data32[0] = *(u32 *)mmio32.data;
1099
1100         return ret;
1101 }
1102
1103 /**
1104  * vgic_handle_mmio_range - handle an in-kernel MMIO access
1105  * @vcpu:       pointer to the vcpu performing the access
1106  * @run:        pointer to the kvm_run structure
1107  * @mmio:       pointer to the data describing the access
1108  * @ranges:     array of MMIO ranges in a given region
1109  * @mmio_base:  base address of that region
1110  *
1111  * returns true if the MMIO access could be performed
1112  */
1113 static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
1114                             struct kvm_exit_mmio *mmio,
1115                             const struct mmio_range *ranges,
1116                             unsigned long mmio_base)
1117 {
1118         const struct mmio_range *range;
1119         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1120         bool updated_state;
1121         unsigned long offset;
1122
1123         offset = mmio->phys_addr - mmio_base;
1124         range = find_matching_range(ranges, mmio, offset);
1125         if (unlikely(!range || !range->handle_mmio)) {
1126                 pr_warn("Unhandled access %d %08llx %d\n",
1127                         mmio->is_write, mmio->phys_addr, mmio->len);
1128                 return false;
1129         }
1130
1131         spin_lock(&vcpu->kvm->arch.vgic.lock);
1132         offset -= range->base;
1133         if (vgic_validate_access(dist, range, offset)) {
1134                 updated_state = call_range_handler(vcpu, mmio, offset, range);
1135         } else {
1136                 if (!mmio->is_write)
1137                         memset(mmio->data, 0, mmio->len);
1138                 updated_state = false;
1139         }
1140         spin_unlock(&vcpu->kvm->arch.vgic.lock);
1141         kvm_prepare_mmio(run, mmio);
1142         kvm_handle_mmio_return(vcpu, run);
1143
1144         if (updated_state)
1145                 vgic_kick_vcpus(vcpu->kvm);
1146
1147         return true;
1148 }
1149
1150 static inline bool is_in_range(phys_addr_t addr, unsigned long len,
1151                                phys_addr_t baseaddr, unsigned long size)
1152 {
1153         return (addr >= baseaddr) && (addr + len <= baseaddr + size);
1154 }
1155
1156 static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
1157                                 struct kvm_exit_mmio *mmio)
1158 {
1159         unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
1160
1161         if (!is_in_range(mmio->phys_addr, mmio->len, base,
1162                          KVM_VGIC_V2_DIST_SIZE))
1163                 return false;
1164
1165         /* GICv2 does not support accesses wider than 32 bits */
1166         if (mmio->len > 4) {
1167                 kvm_inject_dabt(vcpu, mmio->phys_addr);
1168                 return true;
1169         }
1170
1171         return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
1172 }
1173
1174 /**
1175  * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
1176  * @vcpu:      pointer to the vcpu performing the access
1177  * @run:       pointer to the kvm_run structure
1178  * @mmio:      pointer to the data describing the access
1179  *
1180  * returns true if the MMIO access has been performed in kernel space,
1181  * and false if it needs to be emulated in user space.
1182  * Calls the actual handling routine for the selected VGIC model.
1183  */
1184 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
1185                       struct kvm_exit_mmio *mmio)
1186 {
1187         if (!irqchip_in_kernel(vcpu->kvm))
1188                 return false;
1189
1190         /*
1191          * This will currently call either vgic_v2_handle_mmio() or
1192          * vgic_v3_handle_mmio(), which in turn will call
1193          * vgic_handle_mmio_range() defined above.
1194          */
1195         return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
1196 }
1197
1198 static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
1199 {
1200         return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
1201 }
1202
1203 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
1204 {
1205         struct kvm *kvm = vcpu->kvm;
1206         struct vgic_dist *dist = &kvm->arch.vgic;
1207         int nrcpus = atomic_read(&kvm->online_vcpus);
1208         u8 target_cpus;
1209         int sgi, mode, c, vcpu_id;
1210
1211         vcpu_id = vcpu->vcpu_id;
1212
1213         sgi = reg & 0xf;
1214         target_cpus = (reg >> 16) & 0xff;
1215         mode = (reg >> 24) & 3;
1216
1217         switch (mode) {
1218         case 0:
1219                 if (!target_cpus)
1220                         return;
1221                 break;
1222
1223         case 1:
1224                 target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
1225                 break;
1226
1227         case 2:
1228                 target_cpus = 1 << vcpu_id;
1229                 break;
1230         }
1231
1232         kvm_for_each_vcpu(c, vcpu, kvm) {
1233                 if (target_cpus & 1) {
1234                         /* Flag the SGI as pending */
1235                         vgic_dist_irq_set_pending(vcpu, sgi);
1236                         *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
1237                         kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
1238                 }
1239
1240                 target_cpus >>= 1;
1241         }
1242 }
1243
1244 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
1245 {
1246         return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
1247 }
1248
1249 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
1250 {
1251         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1252         unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
1253         unsigned long pending_private, pending_shared;
1254         int nr_shared = vgic_nr_shared_irqs(dist);
1255         int vcpu_id;
1256
1257         vcpu_id = vcpu->vcpu_id;
1258         pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
1259         pend_shared = vcpu->arch.vgic_cpu.pending_shared;
1260
1261         pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
1262         enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
1263         bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
1264
1265         pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
1266         enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
1267         bitmap_and(pend_shared, pending, enabled, nr_shared);
1268         bitmap_and(pend_shared, pend_shared,
1269                    vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
1270                    nr_shared);
1271
1272         pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
1273         pending_shared = find_first_bit(pend_shared, nr_shared);
1274         return (pending_private < VGIC_NR_PRIVATE_IRQS ||
1275                 pending_shared < vgic_nr_shared_irqs(dist));
1276 }
1277
1278 /*
1279  * Update the interrupt state and determine which CPUs have pending
1280  * interrupts. Must be called with distributor lock held.
1281  */
1282 static void vgic_update_state(struct kvm *kvm)
1283 {
1284         struct vgic_dist *dist = &kvm->arch.vgic;
1285         struct kvm_vcpu *vcpu;
1286         int c;
1287
1288         if (!dist->enabled) {
1289                 set_bit(0, dist->irq_pending_on_cpu);
1290                 return;
1291         }
1292
1293         kvm_for_each_vcpu(c, vcpu, kvm) {
1294                 if (compute_pending_for_cpu(vcpu)) {
1295                         pr_debug("CPU%d has pending interrupts\n", c);
1296                         set_bit(c, dist->irq_pending_on_cpu);
1297                 }
1298         }
1299 }
1300
1301 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
1302 {
1303         return vgic_ops->get_lr(vcpu, lr);
1304 }
1305
1306 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
1307                                struct vgic_lr vlr)
1308 {
1309         vgic_ops->set_lr(vcpu, lr, vlr);
1310 }
1311
1312 static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
1313                                struct vgic_lr vlr)
1314 {
1315         vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
1316 }
1317
1318 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
1319 {
1320         return vgic_ops->get_elrsr(vcpu);
1321 }
1322
1323 static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
1324 {
1325         return vgic_ops->get_eisr(vcpu);
1326 }
1327
1328 static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
1329 {
1330         return vgic_ops->get_interrupt_status(vcpu);
1331 }
1332
1333 static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
1334 {
1335         vgic_ops->enable_underflow(vcpu);
1336 }
1337
1338 static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
1339 {
1340         vgic_ops->disable_underflow(vcpu);
1341 }
1342
1343 static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1344 {
1345         vgic_ops->get_vmcr(vcpu, vmcr);
1346 }
1347
1348 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1349 {
1350         vgic_ops->set_vmcr(vcpu, vmcr);
1351 }
1352
1353 static inline void vgic_enable(struct kvm_vcpu *vcpu)
1354 {
1355         vgic_ops->enable(vcpu);
1356 }
1357
1358 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
1359 {
1360         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1361         struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
1362
1363         vlr.state = 0;
1364         vgic_set_lr(vcpu, lr_nr, vlr);
1365         clear_bit(lr_nr, vgic_cpu->lr_used);
1366         vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1367 }
1368
1369 /*
1370  * An interrupt may have been disabled after being made pending on the
1371  * CPU interface (the classic case is a timer running while we're
1372  * rebooting the guest - the interrupt would kick as soon as the CPU
1373  * interface gets enabled, with deadly consequences).
1374  *
1375  * The solution is to examine already active LRs, and check the
1376  * interrupt is still enabled. If not, just retire it.
1377  */
1378 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1379 {
1380         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1381         int lr;
1382
1383         for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
1384                 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1385
1386                 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
1387                         vgic_retire_lr(lr, vlr.irq, vcpu);
1388                         if (vgic_irq_is_queued(vcpu, vlr.irq))
1389                                 vgic_irq_clear_queued(vcpu, vlr.irq);
1390                 }
1391         }
1392 }
1393
1394 /*
1395  * Queue an interrupt to a CPU virtual interface. Return true on success,
1396  * or false if it wasn't possible to queue it.
1397  */
1398 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1399 {
1400         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1401         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1402         struct vgic_lr vlr;
1403         int lr;
1404
1405         /* Sanitize the input... */
1406         BUG_ON(sgi_source_id & ~7);
1407         BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1408         BUG_ON(irq >= dist->nr_irqs);
1409
1410         kvm_debug("Queue IRQ%d\n", irq);
1411
1412         lr = vgic_cpu->vgic_irq_lr_map[irq];
1413
1414         /* Do we have an active interrupt for the same CPUID? */
1415         if (lr != LR_EMPTY) {
1416                 vlr = vgic_get_lr(vcpu, lr);
1417                 if (vlr.source == sgi_source_id) {
1418                         kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
1419                         BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
1420                         vlr.state |= LR_STATE_PENDING;
1421                         vgic_set_lr(vcpu, lr, vlr);
1422                         return true;
1423                 }
1424         }
1425
1426         /* Try to use another LR for this interrupt */
1427         lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
1428                                vgic->nr_lr);
1429         if (lr >= vgic->nr_lr)
1430                 return false;
1431
1432         kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1433         vgic_cpu->vgic_irq_lr_map[irq] = lr;
1434         set_bit(lr, vgic_cpu->lr_used);
1435
1436         vlr.irq = irq;
1437         vlr.source = sgi_source_id;
1438         vlr.state = LR_STATE_PENDING;
1439         if (!vgic_irq_is_edge(vcpu, irq))
1440                 vlr.state |= LR_EOI_INT;
1441
1442         vgic_set_lr(vcpu, lr, vlr);
1443
1444         return true;
1445 }
1446
1447 static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1448 {
1449         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1450         unsigned long sources;
1451         int vcpu_id = vcpu->vcpu_id;
1452         int c;
1453
1454         sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
1455
1456         for_each_set_bit(c, &sources, dist->nr_cpus) {
1457                 if (vgic_queue_irq(vcpu, c, irq))
1458                         clear_bit(c, &sources);
1459         }
1460
1461         *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
1462
1463         /*
1464          * If the sources bitmap has been cleared it means that we
1465          * could queue all the SGIs onto link registers (see the
1466          * clear_bit above), and therefore we are done with them in
1467          * our emulated gic and can get rid of them.
1468          */
1469         if (!sources) {
1470                 vgic_dist_irq_clear_pending(vcpu, irq);
1471                 vgic_cpu_irq_clear(vcpu, irq);
1472                 return true;
1473         }
1474
1475         return false;
1476 }
1477
1478 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1479 {
1480         if (!vgic_can_sample_irq(vcpu, irq))
1481                 return true; /* level interrupt, already queued */
1482
1483         if (vgic_queue_irq(vcpu, 0, irq)) {
1484                 if (vgic_irq_is_edge(vcpu, irq)) {
1485                         vgic_dist_irq_clear_pending(vcpu, irq);
1486                         vgic_cpu_irq_clear(vcpu, irq);
1487                 } else {
1488                         vgic_irq_set_queued(vcpu, irq);
1489                 }
1490
1491                 return true;
1492         }
1493
1494         return false;
1495 }
1496
1497 /*
1498  * Fill the list registers with pending interrupts before running the
1499  * guest.
1500  */
1501 static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1502 {
1503         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1504         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1505         int i, vcpu_id;
1506         int overflow = 0;
1507
1508         vcpu_id = vcpu->vcpu_id;
1509
1510         /*
1511          * We may not have any pending interrupt, or the interrupts
1512          * may have been serviced from another vcpu. In all cases,
1513          * move along.
1514          */
1515         if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
1516                 pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
1517                 goto epilog;
1518         }
1519
1520         /* SGIs */
1521         for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
1522                 if (!queue_sgi(vcpu, i))
1523                         overflow = 1;
1524         }
1525
1526         /* PPIs */
1527         for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
1528                 if (!vgic_queue_hwirq(vcpu, i))
1529                         overflow = 1;
1530         }
1531
1532         /* SPIs */
1533         for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
1534                 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1535                         overflow = 1;
1536         }
1537
1538 epilog:
1539         if (overflow) {
1540                 vgic_enable_underflow(vcpu);
1541         } else {
1542                 vgic_disable_underflow(vcpu);
1543                 /*
1544                  * We're about to run this VCPU, and we've consumed
1545                  * everything the distributor had in store for
1546                  * us. Claim we don't have anything pending. We'll
1547                  * adjust that if needed while exiting.
1548                  */
1549                 clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1550         }
1551 }
1552
1553 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1554 {
1555         u32 status = vgic_get_interrupt_status(vcpu);
1556         bool level_pending = false;
1557
1558         kvm_debug("STATUS = %08x\n", status);
1559
1560         if (status & INT_STATUS_EOI) {
1561                 /*
1562                  * Some level interrupts have been EOIed. Clear their
1563                  * active bit.
1564                  */
1565                 u64 eisr = vgic_get_eisr(vcpu);
1566                 unsigned long *eisr_ptr = u64_to_bitmask(&eisr);
1567                 int lr;
1568
1569                 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1570                         struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1571                         WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1572
1573                         vgic_irq_clear_queued(vcpu, vlr.irq);
1574                         WARN_ON(vlr.state & LR_STATE_MASK);
1575                         vlr.state = 0;
1576                         vgic_set_lr(vcpu, lr, vlr);
1577
1578                         /*
1579                          * If the IRQ was EOIed it was also ACKed and we we
1580                          * therefore assume we can clear the soft pending
1581                          * state (should it had been set) for this interrupt.
1582                          *
1583                          * Note: if the IRQ soft pending state was set after
1584                          * the IRQ was acked, it actually shouldn't be
1585                          * cleared, but we have no way of knowing that unless
1586                          * we start trapping ACKs when the soft-pending state
1587                          * is set.
1588                          */
1589                         vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1590
1591                         /* Any additional pending interrupt? */
1592                         if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1593                                 vgic_cpu_irq_set(vcpu, vlr.irq);
1594                                 level_pending = true;
1595                         } else {
1596                                 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1597                                 vgic_cpu_irq_clear(vcpu, vlr.irq);
1598                         }
1599
1600                         /*
1601                          * Despite being EOIed, the LR may not have
1602                          * been marked as empty.
1603                          */
1604                         vgic_sync_lr_elrsr(vcpu, lr, vlr);
1605                 }
1606         }
1607
1608         if (status & INT_STATUS_UNDERFLOW)
1609                 vgic_disable_underflow(vcpu);
1610
1611         return level_pending;
1612 }
1613
1614 /*
1615  * Sync back the VGIC state after a guest run. The distributor lock is
1616  * needed so we don't get preempted in the middle of the state processing.
1617  */
1618 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1619 {
1620         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1621         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1622         u64 elrsr;
1623         unsigned long *elrsr_ptr;
1624         int lr, pending;
1625         bool level_pending;
1626
1627         level_pending = vgic_process_maintenance(vcpu);
1628         elrsr = vgic_get_elrsr(vcpu);
1629         elrsr_ptr = u64_to_bitmask(&elrsr);
1630
1631         /* Clear mappings for empty LRs */
1632         for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
1633                 struct vgic_lr vlr;
1634
1635                 if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
1636                         continue;
1637
1638                 vlr = vgic_get_lr(vcpu, lr);
1639
1640                 BUG_ON(vlr.irq >= dist->nr_irqs);
1641                 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1642         }
1643
1644         /* Check if we still have something up our sleeve... */
1645         pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1646         if (level_pending || pending < vgic->nr_lr)
1647                 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1648 }
1649
1650 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1651 {
1652         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1653
1654         if (!irqchip_in_kernel(vcpu->kvm))
1655                 return;
1656
1657         spin_lock(&dist->lock);
1658         __kvm_vgic_flush_hwstate(vcpu);
1659         spin_unlock(&dist->lock);
1660 }
1661
1662 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1663 {
1664         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1665
1666         if (!irqchip_in_kernel(vcpu->kvm))
1667                 return;
1668
1669         spin_lock(&dist->lock);
1670         __kvm_vgic_sync_hwstate(vcpu);
1671         spin_unlock(&dist->lock);
1672 }
1673
1674 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1675 {
1676         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1677
1678         if (!irqchip_in_kernel(vcpu->kvm))
1679                 return 0;
1680
1681         return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1682 }
1683
1684 static void vgic_kick_vcpus(struct kvm *kvm)
1685 {
1686         struct kvm_vcpu *vcpu;
1687         int c;
1688
1689         /*
1690          * We've injected an interrupt, time to find out who deserves
1691          * a good kick...
1692          */
1693         kvm_for_each_vcpu(c, vcpu, kvm) {
1694                 if (kvm_vgic_vcpu_pending_irq(vcpu))
1695                         kvm_vcpu_kick(vcpu);
1696         }
1697 }
1698
1699 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1700 {
1701         int edge_triggered = vgic_irq_is_edge(vcpu, irq);
1702
1703         /*
1704          * Only inject an interrupt if:
1705          * - edge triggered and we have a rising edge
1706          * - level triggered and we change level
1707          */
1708         if (edge_triggered) {
1709                 int state = vgic_dist_irq_is_pending(vcpu, irq);
1710                 return level > state;
1711         } else {
1712                 int state = vgic_dist_irq_get_level(vcpu, irq);
1713                 return level != state;
1714         }
1715 }
1716
1717 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1718                                   unsigned int irq_num, bool level)
1719 {
1720         struct vgic_dist *dist = &kvm->arch.vgic;
1721         struct kvm_vcpu *vcpu;
1722         int edge_triggered, level_triggered;
1723         int enabled;
1724         bool ret = true;
1725
1726         spin_lock(&dist->lock);
1727
1728         vcpu = kvm_get_vcpu(kvm, cpuid);
1729         edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
1730         level_triggered = !edge_triggered;
1731
1732         if (!vgic_validate_injection(vcpu, irq_num, level)) {
1733                 ret = false;
1734                 goto out;
1735         }
1736
1737         if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1738                 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1739                 vcpu = kvm_get_vcpu(kvm, cpuid);
1740         }
1741
1742         kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1743
1744         if (level) {
1745                 if (level_triggered)
1746                         vgic_dist_irq_set_level(vcpu, irq_num);
1747                 vgic_dist_irq_set_pending(vcpu, irq_num);
1748         } else {
1749                 if (level_triggered) {
1750                         vgic_dist_irq_clear_level(vcpu, irq_num);
1751                         if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
1752                                 vgic_dist_irq_clear_pending(vcpu, irq_num);
1753                 }
1754
1755                 ret = false;
1756                 goto out;
1757         }
1758
1759         enabled = vgic_irq_is_enabled(vcpu, irq_num);
1760
1761         if (!enabled) {
1762                 ret = false;
1763                 goto out;
1764         }
1765
1766         if (!vgic_can_sample_irq(vcpu, irq_num)) {
1767                 /*
1768                  * Level interrupt in progress, will be picked up
1769                  * when EOId.
1770                  */
1771                 ret = false;
1772                 goto out;
1773         }
1774
1775         if (level) {
1776                 vgic_cpu_irq_set(vcpu, irq_num);
1777                 set_bit(cpuid, dist->irq_pending_on_cpu);
1778         }
1779
1780 out:
1781         spin_unlock(&dist->lock);
1782
1783         return ret ? cpuid : -EINVAL;
1784 }
1785
1786 /**
1787  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
1788  * @kvm:     The VM structure pointer
1789  * @cpuid:   The CPU for PPIs
1790  * @irq_num: The IRQ number that is assigned to the device
1791  * @level:   Edge-triggered:  true:  to trigger the interrupt
1792  *                            false: to ignore the call
1793  *           Level-sensitive  true:  activates an interrupt
1794  *                            false: deactivates an interrupt
1795  *
1796  * The GIC is not concerned with devices being active-LOW or active-HIGH for
1797  * level-sensitive interrupts.  You can think of the level parameter as 1
1798  * being HIGH and 0 being LOW and all devices being active-HIGH.
1799  */
1800 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1801                         bool level)
1802 {
1803         int ret = 0;
1804         int vcpu_id;
1805
1806         if (unlikely(!vgic_initialized(kvm))) {
1807                 /*
1808                  * We only provide the automatic initialization of the VGIC
1809                  * for the legacy case of a GICv2. Any other type must
1810                  * be explicitly initialized once setup with the respective
1811                  * KVM device call.
1812                  */
1813                 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
1814                         ret = -EBUSY;
1815                         goto out;
1816                 }
1817                 mutex_lock(&kvm->lock);
1818                 ret = vgic_init(kvm);
1819                 mutex_unlock(&kvm->lock);
1820
1821                 if (ret)
1822                         goto out;
1823         }
1824
1825         vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
1826         if (vcpu_id >= 0) {
1827                 /* kick the specified vcpu */
1828                 kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
1829         }
1830
1831 out:
1832         return ret;
1833 }
1834
1835 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1836 {
1837         /*
1838          * We cannot rely on the vgic maintenance interrupt to be
1839          * delivered synchronously. This means we can only use it to
1840          * exit the VM, and we perform the handling of EOIed
1841          * interrupts on the exit path (see vgic_process_maintenance).
1842          */
1843         return IRQ_HANDLED;
1844 }
1845
1846 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1847 {
1848         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1849
1850         kfree(vgic_cpu->pending_shared);
1851         kfree(vgic_cpu->vgic_irq_lr_map);
1852         vgic_cpu->pending_shared = NULL;
1853         vgic_cpu->vgic_irq_lr_map = NULL;
1854 }
1855
1856 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1857 {
1858         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1859
1860         int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
1861         vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1862         vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
1863
1864         if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
1865                 kvm_vgic_vcpu_destroy(vcpu);
1866                 return -ENOMEM;
1867         }
1868
1869         memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
1870
1871         /*
1872          * Store the number of LRs per vcpu, so we don't have to go
1873          * all the way to the distributor structure to find out. Only
1874          * assembly code should use this one.
1875          */
1876         vgic_cpu->nr_lr = vgic->nr_lr;
1877
1878         return 0;
1879 }
1880
1881 void kvm_vgic_destroy(struct kvm *kvm)
1882 {
1883         struct vgic_dist *dist = &kvm->arch.vgic;
1884         struct kvm_vcpu *vcpu;
1885         int i;
1886
1887         kvm_for_each_vcpu(i, vcpu, kvm)
1888                 kvm_vgic_vcpu_destroy(vcpu);
1889
1890         vgic_free_bitmap(&dist->irq_enabled);
1891         vgic_free_bitmap(&dist->irq_level);
1892         vgic_free_bitmap(&dist->irq_pending);
1893         vgic_free_bitmap(&dist->irq_soft_pend);
1894         vgic_free_bitmap(&dist->irq_queued);
1895         vgic_free_bitmap(&dist->irq_cfg);
1896         vgic_free_bytemap(&dist->irq_priority);
1897         if (dist->irq_spi_target) {
1898                 for (i = 0; i < dist->nr_cpus; i++)
1899                         vgic_free_bitmap(&dist->irq_spi_target[i]);
1900         }
1901         kfree(dist->irq_sgi_sources);
1902         kfree(dist->irq_spi_cpu);
1903         kfree(dist->irq_spi_target);
1904         kfree(dist->irq_pending_on_cpu);
1905         dist->irq_sgi_sources = NULL;
1906         dist->irq_spi_cpu = NULL;
1907         dist->irq_spi_target = NULL;
1908         dist->irq_pending_on_cpu = NULL;
1909         dist->nr_cpus = 0;
1910 }
1911
1912 static int vgic_v2_init_model(struct kvm *kvm)
1913 {
1914         int i;
1915
1916         for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
1917                 vgic_set_target_reg(kvm, 0, i);
1918
1919         return 0;
1920 }
1921
1922 /*
1923  * Allocate and initialize the various data structures. Must be called
1924  * with kvm->lock held!
1925  */
1926 static int vgic_init(struct kvm *kvm)
1927 {
1928         struct vgic_dist *dist = &kvm->arch.vgic;
1929         struct kvm_vcpu *vcpu;
1930         int nr_cpus, nr_irqs;
1931         int ret, i, vcpu_id;
1932
1933         if (vgic_initialized(kvm))
1934                 return 0;
1935
1936         nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
1937         if (!nr_cpus)           /* No vcpus? Can't be good... */
1938                 return -ENODEV;
1939
1940         /*
1941          * If nobody configured the number of interrupts, use the
1942          * legacy one.
1943          */
1944         if (!dist->nr_irqs)
1945                 dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
1946
1947         nr_irqs = dist->nr_irqs;
1948
1949         ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
1950         ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
1951         ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
1952         ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
1953         ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
1954         ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
1955         ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
1956
1957         if (ret)
1958                 goto out;
1959
1960         dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
1961         dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
1962         dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
1963                                        GFP_KERNEL);
1964         dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1965                                            GFP_KERNEL);
1966         if (!dist->irq_sgi_sources ||
1967             !dist->irq_spi_cpu ||
1968             !dist->irq_spi_target ||
1969             !dist->irq_pending_on_cpu) {
1970                 ret = -ENOMEM;
1971                 goto out;
1972         }
1973
1974         for (i = 0; i < nr_cpus; i++)
1975                 ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
1976                                         nr_cpus, nr_irqs);
1977
1978         if (ret)
1979                 goto out;
1980
1981         ret = kvm->arch.vgic.vm_ops.init_model(kvm);
1982         if (ret)
1983                 goto out;
1984
1985         kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
1986                 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
1987                 if (ret) {
1988                         kvm_err("VGIC: Failed to allocate vcpu memory\n");
1989                         break;
1990                 }
1991
1992                 for (i = 0; i < dist->nr_irqs; i++) {
1993                         if (i < VGIC_NR_PPIS)
1994                                 vgic_bitmap_set_irq_val(&dist->irq_enabled,
1995                                                         vcpu->vcpu_id, i, 1);
1996                         if (i < VGIC_NR_PRIVATE_IRQS)
1997                                 vgic_bitmap_set_irq_val(&dist->irq_cfg,
1998                                                         vcpu->vcpu_id, i,
1999                                                         VGIC_CFG_EDGE);
2000                 }
2001
2002                 vgic_enable(vcpu);
2003         }
2004
2005 out:
2006         if (ret)
2007                 kvm_vgic_destroy(kvm);
2008
2009         return ret;
2010 }
2011
2012 /**
2013  * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
2014  * @kvm: pointer to the kvm struct
2015  *
2016  * Map the virtual CPU interface into the VM before running any VCPUs.  We
2017  * can't do this at creation time, because user space must first set the
2018  * virtual CPU interface address in the guest physical address space.
2019  */
2020 static int vgic_v2_map_resources(struct kvm *kvm,
2021                                  const struct vgic_params *params)
2022 {
2023         int ret = 0;
2024
2025         if (!irqchip_in_kernel(kvm))
2026                 return 0;
2027
2028         mutex_lock(&kvm->lock);
2029
2030         if (vgic_ready(kvm))
2031                 goto out;
2032
2033         if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
2034             IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
2035                 kvm_err("Need to set vgic cpu and dist addresses first\n");
2036                 ret = -ENXIO;
2037                 goto out;
2038         }
2039
2040         /*
2041          * Initialize the vgic if this hasn't already been done on demand by
2042          * accessing the vgic state from userspace.
2043          */
2044         ret = vgic_init(kvm);
2045         if (ret) {
2046                 kvm_err("Unable to allocate maps\n");
2047                 goto out;
2048         }
2049
2050         ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
2051                                     params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
2052                                     true);
2053         if (ret) {
2054                 kvm_err("Unable to remap VGIC CPU to VCPU\n");
2055                 goto out;
2056         }
2057
2058         kvm->arch.vgic.ready = true;
2059 out:
2060         if (ret)
2061                 kvm_vgic_destroy(kvm);
2062         mutex_unlock(&kvm->lock);
2063         return ret;
2064 }
2065
2066 static void vgic_v2_init_emulation(struct kvm *kvm)
2067 {
2068         struct vgic_dist *dist = &kvm->arch.vgic;
2069
2070         dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
2071         dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
2072         dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
2073         dist->vm_ops.init_model = vgic_v2_init_model;
2074         dist->vm_ops.map_resources = vgic_v2_map_resources;
2075 }
2076
2077 static int init_vgic_model(struct kvm *kvm, int type)
2078 {
2079         switch (type) {
2080         case KVM_DEV_TYPE_ARM_VGIC_V2:
2081                 vgic_v2_init_emulation(kvm);
2082                 break;
2083         default:
2084                 return -ENODEV;
2085         }
2086
2087         return 0;
2088 }
2089
2090 int kvm_vgic_create(struct kvm *kvm, u32 type)
2091 {
2092         int i, vcpu_lock_idx = -1, ret;
2093         struct kvm_vcpu *vcpu;
2094
2095         mutex_lock(&kvm->lock);
2096
2097         if (kvm->arch.vgic.vctrl_base) {
2098                 ret = -EEXIST;
2099                 goto out;
2100         }
2101
2102         /*
2103          * Any time a vcpu is run, vcpu_load is called which tries to grab the
2104          * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
2105          * that no other VCPUs are run while we create the vgic.
2106          */
2107         ret = -EBUSY;
2108         kvm_for_each_vcpu(i, vcpu, kvm) {
2109                 if (!mutex_trylock(&vcpu->mutex))
2110                         goto out_unlock;
2111                 vcpu_lock_idx = i;
2112         }
2113
2114         kvm_for_each_vcpu(i, vcpu, kvm) {
2115                 if (vcpu->arch.has_run_once)
2116                         goto out_unlock;
2117         }
2118         ret = 0;
2119
2120         ret = init_vgic_model(kvm, type);
2121         if (ret)
2122                 goto out_unlock;
2123
2124         spin_lock_init(&kvm->arch.vgic.lock);
2125         kvm->arch.vgic.in_kernel = true;
2126         kvm->arch.vgic.vgic_model = type;
2127         kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
2128         kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
2129         kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
2130
2131 out_unlock:
2132         for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
2133                 vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
2134                 mutex_unlock(&vcpu->mutex);
2135         }
2136
2137 out:
2138         mutex_unlock(&kvm->lock);
2139         return ret;
2140 }
2141
2142 static int vgic_ioaddr_overlap(struct kvm *kvm)
2143 {
2144         phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
2145         phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
2146
2147         if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
2148                 return 0;
2149         if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
2150             (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
2151                 return -EBUSY;
2152         return 0;
2153 }
2154
2155 static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
2156                               phys_addr_t addr, phys_addr_t size)
2157 {
2158         int ret;
2159
2160         if (addr & ~KVM_PHYS_MASK)
2161                 return -E2BIG;
2162
2163         if (addr & (SZ_4K - 1))
2164                 return -EINVAL;
2165
2166         if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
2167                 return -EEXIST;
2168         if (addr + size < addr)
2169                 return -EINVAL;
2170
2171         *ioaddr = addr;
2172         ret = vgic_ioaddr_overlap(kvm);
2173         if (ret)
2174                 *ioaddr = VGIC_ADDR_UNDEF;
2175
2176         return ret;
2177 }
2178
2179 /**
2180  * kvm_vgic_addr - set or get vgic VM base addresses
2181  * @kvm:   pointer to the vm struct
2182  * @type:  the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
2183  * @addr:  pointer to address value
2184  * @write: if true set the address in the VM address space, if false read the
2185  *          address
2186  *
2187  * Set or get the vgic base addresses for the distributor and the virtual CPU
2188  * interface in the VM physical address space.  These addresses are properties
2189  * of the emulated core/SoC and therefore user space initially knows this
2190  * information.
2191  */
2192 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
2193 {
2194         int r = 0;
2195         struct vgic_dist *vgic = &kvm->arch.vgic;
2196
2197         mutex_lock(&kvm->lock);
2198         switch (type) {
2199         case KVM_VGIC_V2_ADDR_TYPE_DIST:
2200                 if (write) {
2201                         r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
2202                                                *addr, KVM_VGIC_V2_DIST_SIZE);
2203                 } else {
2204                         *addr = vgic->vgic_dist_base;
2205                 }
2206                 break;
2207         case KVM_VGIC_V2_ADDR_TYPE_CPU:
2208                 if (write) {
2209                         r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
2210                                                *addr, KVM_VGIC_V2_CPU_SIZE);
2211                 } else {
2212                         *addr = vgic->vgic_cpu_base;
2213                 }
2214                 break;
2215         default:
2216                 r = -ENODEV;
2217         }
2218
2219         mutex_unlock(&kvm->lock);
2220         return r;
2221 }
2222
2223 static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
2224                                  struct kvm_exit_mmio *mmio, phys_addr_t offset)
2225 {
2226         bool updated = false;
2227         struct vgic_vmcr vmcr;
2228         u32 *vmcr_field;
2229         u32 reg;
2230
2231         vgic_get_vmcr(vcpu, &vmcr);
2232
2233         switch (offset & ~0x3) {
2234         case GIC_CPU_CTRL:
2235                 vmcr_field = &vmcr.ctlr;
2236                 break;
2237         case GIC_CPU_PRIMASK:
2238                 vmcr_field = &vmcr.pmr;
2239                 break;
2240         case GIC_CPU_BINPOINT:
2241                 vmcr_field = &vmcr.bpr;
2242                 break;
2243         case GIC_CPU_ALIAS_BINPOINT:
2244                 vmcr_field = &vmcr.abpr;
2245                 break;
2246         default:
2247                 BUG();
2248         }
2249
2250         if (!mmio->is_write) {
2251                 reg = *vmcr_field;
2252                 mmio_data_write(mmio, ~0, reg);
2253         } else {
2254                 reg = mmio_data_read(mmio, ~0);
2255                 if (reg != *vmcr_field) {
2256                         *vmcr_field = reg;
2257                         vgic_set_vmcr(vcpu, &vmcr);
2258                         updated = true;
2259                 }
2260         }
2261         return updated;
2262 }
2263
2264 static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
2265                              struct kvm_exit_mmio *mmio, phys_addr_t offset)
2266 {
2267         return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
2268 }
2269
2270 static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
2271                                   struct kvm_exit_mmio *mmio,
2272                                   phys_addr_t offset)
2273 {
2274         u32 reg;
2275
2276         if (mmio->is_write)
2277                 return false;
2278
2279         /* GICC_IIDR */
2280         reg = (PRODUCT_ID_KVM << 20) |
2281               (GICC_ARCH_VERSION_V2 << 16) |
2282               (IMPLEMENTER_ARM << 0);
2283         mmio_data_write(mmio, ~0, reg);
2284         return false;
2285 }
2286
2287 /*
2288  * CPU Interface Register accesses - these are not accessed by the VM, but by
2289  * user space for saving and restoring VGIC state.
2290  */
2291 static const struct mmio_range vgic_cpu_ranges[] = {
2292         {
2293                 .base           = GIC_CPU_CTRL,
2294                 .len            = 12,
2295                 .handle_mmio    = handle_cpu_mmio_misc,
2296         },
2297         {
2298                 .base           = GIC_CPU_ALIAS_BINPOINT,
2299                 .len            = 4,
2300                 .handle_mmio    = handle_mmio_abpr,
2301         },
2302         {
2303                 .base           = GIC_CPU_ACTIVEPRIO,
2304                 .len            = 16,
2305                 .handle_mmio    = handle_mmio_raz_wi,
2306         },
2307         {
2308                 .base           = GIC_CPU_IDENT,
2309                 .len            = 4,
2310                 .handle_mmio    = handle_cpu_mmio_ident,
2311         },
2312 };
2313
2314 static int vgic_attr_regs_access(struct kvm_device *dev,
2315                                  struct kvm_device_attr *attr,
2316                                  u32 *reg, bool is_write)
2317 {
2318         const struct mmio_range *r = NULL, *ranges;
2319         phys_addr_t offset;
2320         int ret, cpuid, c;
2321         struct kvm_vcpu *vcpu, *tmp_vcpu;
2322         struct vgic_dist *vgic;
2323         struct kvm_exit_mmio mmio;
2324
2325         offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2326         cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
2327                 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
2328
2329         mutex_lock(&dev->kvm->lock);
2330
2331         ret = vgic_init(dev->kvm);
2332         if (ret)
2333                 goto out;
2334
2335         if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
2336                 ret = -EINVAL;
2337                 goto out;
2338         }
2339
2340         vcpu = kvm_get_vcpu(dev->kvm, cpuid);
2341         vgic = &dev->kvm->arch.vgic;
2342
2343         mmio.len = 4;
2344         mmio.is_write = is_write;
2345         if (is_write)
2346                 mmio_data_write(&mmio, ~0, *reg);
2347         switch (attr->group) {
2348         case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2349                 mmio.phys_addr = vgic->vgic_dist_base + offset;
2350                 ranges = vgic_dist_ranges;
2351                 break;
2352         case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
2353                 mmio.phys_addr = vgic->vgic_cpu_base + offset;
2354                 ranges = vgic_cpu_ranges;
2355                 break;
2356         default:
2357                 BUG();
2358         }
2359         r = find_matching_range(ranges, &mmio, offset);
2360
2361         if (unlikely(!r || !r->handle_mmio)) {
2362                 ret = -ENXIO;
2363                 goto out;
2364         }
2365
2366
2367         spin_lock(&vgic->lock);
2368
2369         /*
2370          * Ensure that no other VCPU is running by checking the vcpu->cpu
2371          * field.  If no other VPCUs are running we can safely access the VGIC
2372          * state, because even if another VPU is run after this point, that
2373          * VCPU will not touch the vgic state, because it will block on
2374          * getting the vgic->lock in kvm_vgic_sync_hwstate().
2375          */
2376         kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
2377                 if (unlikely(tmp_vcpu->cpu != -1)) {
2378                         ret = -EBUSY;
2379                         goto out_vgic_unlock;
2380                 }
2381         }
2382
2383         /*
2384          * Move all pending IRQs from the LRs on all VCPUs so the pending
2385          * state can be properly represented in the register state accessible
2386          * through this API.
2387          */
2388         kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
2389                 vgic_unqueue_irqs(tmp_vcpu);
2390
2391         offset -= r->base;
2392         r->handle_mmio(vcpu, &mmio, offset);
2393
2394         if (!is_write)
2395                 *reg = mmio_data_read(&mmio, ~0);
2396
2397         ret = 0;
2398 out_vgic_unlock:
2399         spin_unlock(&vgic->lock);
2400 out:
2401         mutex_unlock(&dev->kvm->lock);
2402         return ret;
2403 }
2404
2405 static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2406 {
2407         int r;
2408
2409         switch (attr->group) {
2410         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2411                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2412                 u64 addr;
2413                 unsigned long type = (unsigned long)attr->attr;
2414
2415                 if (copy_from_user(&addr, uaddr, sizeof(addr)))
2416                         return -EFAULT;
2417
2418                 r = kvm_vgic_addr(dev->kvm, type, &addr, true);
2419                 return (r == -ENODEV) ? -ENXIO : r;
2420         }
2421
2422         case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2423         case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
2424                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2425                 u32 reg;
2426
2427                 if (get_user(reg, uaddr))
2428                         return -EFAULT;
2429
2430                 return vgic_attr_regs_access(dev, attr, &reg, true);
2431         }
2432         case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2433                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2434                 u32 val;
2435                 int ret = 0;
2436
2437                 if (get_user(val, uaddr))
2438                         return -EFAULT;
2439
2440                 /*
2441                  * We require:
2442                  * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
2443                  * - at most 1024 interrupts
2444                  * - a multiple of 32 interrupts
2445                  */
2446                 if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
2447                     val > VGIC_MAX_IRQS ||
2448                     (val & 31))
2449                         return -EINVAL;
2450
2451                 mutex_lock(&dev->kvm->lock);
2452
2453                 if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2454                         ret = -EBUSY;
2455                 else
2456                         dev->kvm->arch.vgic.nr_irqs = val;
2457
2458                 mutex_unlock(&dev->kvm->lock);
2459
2460                 return ret;
2461         }
2462         case KVM_DEV_ARM_VGIC_GRP_CTRL: {
2463                 switch (attr->attr) {
2464                 case KVM_DEV_ARM_VGIC_CTRL_INIT:
2465                         r = vgic_init(dev->kvm);
2466                         return r;
2467                 }
2468                 break;
2469         }
2470         }
2471
2472         return -ENXIO;
2473 }
2474
2475 static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2476 {
2477         int r = -ENXIO;
2478
2479         switch (attr->group) {
2480         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2481                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2482                 u64 addr;
2483                 unsigned long type = (unsigned long)attr->attr;
2484
2485                 r = kvm_vgic_addr(dev->kvm, type, &addr, false);
2486                 if (r)
2487                         return (r == -ENODEV) ? -ENXIO : r;
2488
2489                 if (copy_to_user(uaddr, &addr, sizeof(addr)))
2490                         return -EFAULT;
2491                 break;
2492         }
2493
2494         case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2495         case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
2496                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2497                 u32 reg = 0;
2498
2499                 r = vgic_attr_regs_access(dev, attr, &reg, false);
2500                 if (r)
2501                         return r;
2502                 r = put_user(reg, uaddr);
2503                 break;
2504         }
2505         case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2506                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2507                 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
2508                 break;
2509         }
2510
2511         }
2512
2513         return r;
2514 }
2515
2516 static int vgic_has_attr_regs(const struct mmio_range *ranges,
2517                               phys_addr_t offset)
2518 {
2519         struct kvm_exit_mmio dev_attr_mmio;
2520
2521         dev_attr_mmio.len = 4;
2522         if (find_matching_range(ranges, &dev_attr_mmio, offset))
2523                 return 0;
2524         else
2525                 return -ENXIO;
2526 }
2527
2528 static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2529 {
2530         phys_addr_t offset;
2531
2532         switch (attr->group) {
2533         case KVM_DEV_ARM_VGIC_GRP_ADDR:
2534                 switch (attr->attr) {
2535                 case KVM_VGIC_V2_ADDR_TYPE_DIST:
2536                 case KVM_VGIC_V2_ADDR_TYPE_CPU:
2537                         return 0;
2538                 }
2539                 break;
2540         case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2541                 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2542                 return vgic_has_attr_regs(vgic_dist_ranges, offset);
2543         case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
2544                 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2545                 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
2546         case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
2547                 return 0;
2548         case KVM_DEV_ARM_VGIC_GRP_CTRL:
2549                 switch (attr->attr) {
2550                 case KVM_DEV_ARM_VGIC_CTRL_INIT:
2551                         return 0;
2552                 }
2553         }
2554         return -ENXIO;
2555 }
2556
2557 static void vgic_destroy(struct kvm_device *dev)
2558 {
2559         kfree(dev);
2560 }
2561
2562 static int vgic_create(struct kvm_device *dev, u32 type)
2563 {
2564         return kvm_vgic_create(dev->kvm, type);
2565 }
2566
2567 static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2568         .name = "kvm-arm-vgic",
2569         .create = vgic_create,
2570         .destroy = vgic_destroy,
2571         .set_attr = vgic_set_attr,
2572         .get_attr = vgic_get_attr,
2573         .has_attr = vgic_has_attr,
2574 };
2575
2576 static void vgic_init_maintenance_interrupt(void *info)
2577 {
2578         enable_percpu_irq(vgic->maint_irq, 0);
2579 }
2580
2581 static int vgic_cpu_notify(struct notifier_block *self,
2582                            unsigned long action, void *cpu)
2583 {
2584         switch (action) {
2585         case CPU_STARTING:
2586         case CPU_STARTING_FROZEN:
2587                 vgic_init_maintenance_interrupt(NULL);
2588                 break;
2589         case CPU_DYING:
2590         case CPU_DYING_FROZEN:
2591                 disable_percpu_irq(vgic->maint_irq);
2592                 break;
2593         }
2594
2595         return NOTIFY_OK;
2596 }
2597
2598 static struct notifier_block vgic_cpu_nb = {
2599         .notifier_call = vgic_cpu_notify,
2600 };
2601
2602 static const struct of_device_id vgic_ids[] = {
2603         { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
2604         { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
2605         {},
2606 };
2607
2608 int kvm_vgic_hyp_init(void)
2609 {
2610         const struct of_device_id *matched_id;
2611         const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2612                                 const struct vgic_params **);
2613         struct device_node *vgic_node;
2614         int ret;
2615
2616         vgic_node = of_find_matching_node_and_match(NULL,
2617                                                     vgic_ids, &matched_id);
2618         if (!vgic_node) {
2619                 kvm_err("error: no compatible GIC node found\n");
2620                 return -ENODEV;
2621         }
2622
2623         vgic_probe = matched_id->data;
2624         ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
2625         if (ret)
2626                 return ret;
2627
2628         ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2629                                  "vgic", kvm_get_running_vcpus());
2630         if (ret) {
2631                 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2632                 return ret;
2633         }
2634
2635         ret = __register_cpu_notifier(&vgic_cpu_nb);
2636         if (ret) {
2637                 kvm_err("Cannot register vgic CPU notifier\n");
2638                 goto out_free_irq;
2639         }
2640
2641         /* Callback into for arch code for setup */
2642         vgic_arch_setup(vgic);
2643
2644         on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2645
2646         return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
2647                                        KVM_DEV_TYPE_ARM_VGIC_V2);
2648
2649 out_free_irq:
2650         free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2651         return ret;
2652 }