OSDN Git Service

9cd9e9da60ddb28bf02b4449480aabcdee95375c
[tomoyo/tomoyo-test1.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  *    Eddie Dong <eddie.dong@intel.com>
30  *
31  * Contributors:
32  *    Niu Bing <bing.niu@intel.com>
33  *    Zhi Wang <zhi.a.wang@intel.com>
34  */
35
36 #include <linux/init.h>
37 #include <linux/mm.h>
38 #include <linux/kthread.h>
39 #include <linux/sched/mm.h>
40 #include <linux/types.h>
41 #include <linux/list.h>
42 #include <linux/rbtree.h>
43 #include <linux/spinlock.h>
44 #include <linux/eventfd.h>
45 #include <linux/mdev.h>
46 #include <linux/debugfs.h>
47
48 #include <linux/nospec.h>
49
50 #include <drm/drm_edid.h>
51
52 #include "i915_drv.h"
53 #include "intel_gvt.h"
54 #include "gvt.h"
55
56 MODULE_IMPORT_NS(DMA_BUF);
57 MODULE_IMPORT_NS(I915_GVT);
58
59 /* helper macros copied from vfio-pci */
60 #define VFIO_PCI_OFFSET_SHIFT   40
61 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
62 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
63 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
64
65 #define EDID_BLOB_OFFSET (PAGE_SIZE/2)
66
67 #define OPREGION_SIGNATURE "IntelGraphicsMem"
68
69 struct vfio_region;
70 struct intel_vgpu_regops {
71         size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
72                         size_t count, loff_t *ppos, bool iswrite);
73         void (*release)(struct intel_vgpu *vgpu,
74                         struct vfio_region *region);
75 };
76
77 struct vfio_region {
78         u32                             type;
79         u32                             subtype;
80         size_t                          size;
81         u32                             flags;
82         const struct intel_vgpu_regops  *ops;
83         void                            *data;
84 };
85
86 struct vfio_edid_region {
87         struct vfio_region_gfx_edid vfio_edid_regs;
88         void *edid_blob;
89 };
90
91 struct kvmgt_pgfn {
92         gfn_t gfn;
93         struct hlist_node hnode;
94 };
95
96 struct gvt_dma {
97         struct intel_vgpu *vgpu;
98         struct rb_node gfn_node;
99         struct rb_node dma_addr_node;
100         gfn_t gfn;
101         dma_addr_t dma_addr;
102         unsigned long size;
103         struct kref ref;
104 };
105
106 #define vfio_dev_to_vgpu(vfio_dev) \
107         container_of((vfio_dev), struct intel_vgpu, vfio_device)
108
109 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
110                 const u8 *val, int len,
111                 struct kvm_page_track_notifier_node *node);
112 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
113                 struct kvm_memory_slot *slot,
114                 struct kvm_page_track_notifier_node *node);
115
116 static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
117 {
118         struct intel_vgpu_type *type =
119                 container_of(mtype, struct intel_vgpu_type, type);
120
121         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
122                        "fence: %d\nresolution: %s\n"
123                        "weight: %d\n",
124                        BYTES_TO_MB(type->conf->low_mm),
125                        BYTES_TO_MB(type->conf->high_mm),
126                        type->conf->fence, vgpu_edid_str(type->conf->edid),
127                        type->conf->weight);
128 }
129
130 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
131                 unsigned long size)
132 {
133         vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT,
134                          DIV_ROUND_UP(size, PAGE_SIZE));
135 }
136
137 /* Pin a normal or compound guest page for dma. */
138 static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
139                 unsigned long size, struct page **page)
140 {
141         int total_pages = DIV_ROUND_UP(size, PAGE_SIZE);
142         struct page *base_page = NULL;
143         int npage;
144         int ret;
145
146         /*
147          * We pin the pages one-by-one to avoid allocating a big arrary
148          * on stack to hold pfns.
149          */
150         for (npage = 0; npage < total_pages; npage++) {
151                 dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT;
152                 struct page *cur_page;
153
154                 ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1,
155                                      IOMMU_READ | IOMMU_WRITE, &cur_page);
156                 if (ret != 1) {
157                         gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n",
158                                      &cur_iova, ret);
159                         goto err;
160                 }
161
162                 if (npage == 0)
163                         base_page = cur_page;
164                 else if (base_page + npage != cur_page) {
165                         gvt_vgpu_err("The pages are not continuous\n");
166                         ret = -EINVAL;
167                         npage++;
168                         goto err;
169                 }
170         }
171
172         *page = base_page;
173         return 0;
174 err:
175         gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
176         return ret;
177 }
178
179 static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
180                 dma_addr_t *dma_addr, unsigned long size)
181 {
182         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
183         struct page *page = NULL;
184         int ret;
185
186         ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
187         if (ret)
188                 return ret;
189
190         /* Setup DMA mapping. */
191         *dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL);
192         if (dma_mapping_error(dev, *dma_addr)) {
193                 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
194                              page_to_pfn(page), ret);
195                 gvt_unpin_guest_page(vgpu, gfn, size);
196                 return -ENOMEM;
197         }
198
199         return 0;
200 }
201
202 static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
203                 dma_addr_t dma_addr, unsigned long size)
204 {
205         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
206
207         dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL);
208         gvt_unpin_guest_page(vgpu, gfn, size);
209 }
210
211 static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
212                 dma_addr_t dma_addr)
213 {
214         struct rb_node *node = vgpu->dma_addr_cache.rb_node;
215         struct gvt_dma *itr;
216
217         while (node) {
218                 itr = rb_entry(node, struct gvt_dma, dma_addr_node);
219
220                 if (dma_addr < itr->dma_addr)
221                         node = node->rb_left;
222                 else if (dma_addr > itr->dma_addr)
223                         node = node->rb_right;
224                 else
225                         return itr;
226         }
227         return NULL;
228 }
229
230 static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
231 {
232         struct rb_node *node = vgpu->gfn_cache.rb_node;
233         struct gvt_dma *itr;
234
235         while (node) {
236                 itr = rb_entry(node, struct gvt_dma, gfn_node);
237
238                 if (gfn < itr->gfn)
239                         node = node->rb_left;
240                 else if (gfn > itr->gfn)
241                         node = node->rb_right;
242                 else
243                         return itr;
244         }
245         return NULL;
246 }
247
248 static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
249                 dma_addr_t dma_addr, unsigned long size)
250 {
251         struct gvt_dma *new, *itr;
252         struct rb_node **link, *parent = NULL;
253
254         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
255         if (!new)
256                 return -ENOMEM;
257
258         new->vgpu = vgpu;
259         new->gfn = gfn;
260         new->dma_addr = dma_addr;
261         new->size = size;
262         kref_init(&new->ref);
263
264         /* gfn_cache maps gfn to struct gvt_dma. */
265         link = &vgpu->gfn_cache.rb_node;
266         while (*link) {
267                 parent = *link;
268                 itr = rb_entry(parent, struct gvt_dma, gfn_node);
269
270                 if (gfn < itr->gfn)
271                         link = &parent->rb_left;
272                 else
273                         link = &parent->rb_right;
274         }
275         rb_link_node(&new->gfn_node, parent, link);
276         rb_insert_color(&new->gfn_node, &vgpu->gfn_cache);
277
278         /* dma_addr_cache maps dma addr to struct gvt_dma. */
279         parent = NULL;
280         link = &vgpu->dma_addr_cache.rb_node;
281         while (*link) {
282                 parent = *link;
283                 itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
284
285                 if (dma_addr < itr->dma_addr)
286                         link = &parent->rb_left;
287                 else
288                         link = &parent->rb_right;
289         }
290         rb_link_node(&new->dma_addr_node, parent, link);
291         rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache);
292
293         vgpu->nr_cache_entries++;
294         return 0;
295 }
296
297 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
298                                 struct gvt_dma *entry)
299 {
300         rb_erase(&entry->gfn_node, &vgpu->gfn_cache);
301         rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache);
302         kfree(entry);
303         vgpu->nr_cache_entries--;
304 }
305
306 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
307 {
308         struct gvt_dma *dma;
309         struct rb_node *node = NULL;
310
311         for (;;) {
312                 mutex_lock(&vgpu->cache_lock);
313                 node = rb_first(&vgpu->gfn_cache);
314                 if (!node) {
315                         mutex_unlock(&vgpu->cache_lock);
316                         break;
317                 }
318                 dma = rb_entry(node, struct gvt_dma, gfn_node);
319                 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
320                 __gvt_cache_remove_entry(vgpu, dma);
321                 mutex_unlock(&vgpu->cache_lock);
322         }
323 }
324
325 static void gvt_cache_init(struct intel_vgpu *vgpu)
326 {
327         vgpu->gfn_cache = RB_ROOT;
328         vgpu->dma_addr_cache = RB_ROOT;
329         vgpu->nr_cache_entries = 0;
330         mutex_init(&vgpu->cache_lock);
331 }
332
333 static void kvmgt_protect_table_init(struct intel_vgpu *info)
334 {
335         hash_init(info->ptable);
336 }
337
338 static void kvmgt_protect_table_destroy(struct intel_vgpu *info)
339 {
340         struct kvmgt_pgfn *p;
341         struct hlist_node *tmp;
342         int i;
343
344         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
345                 hash_del(&p->hnode);
346                 kfree(p);
347         }
348 }
349
350 static struct kvmgt_pgfn *
351 __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
352 {
353         struct kvmgt_pgfn *p, *res = NULL;
354
355         hash_for_each_possible(info->ptable, p, hnode, gfn) {
356                 if (gfn == p->gfn) {
357                         res = p;
358                         break;
359                 }
360         }
361
362         return res;
363 }
364
365 static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn)
366 {
367         struct kvmgt_pgfn *p;
368
369         p = __kvmgt_protect_table_find(info, gfn);
370         return !!p;
371 }
372
373 static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn)
374 {
375         struct kvmgt_pgfn *p;
376
377         if (kvmgt_gfn_is_write_protected(info, gfn))
378                 return;
379
380         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
381         if (WARN(!p, "gfn: 0x%llx\n", gfn))
382                 return;
383
384         p->gfn = gfn;
385         hash_add(info->ptable, &p->hnode, gfn);
386 }
387
388 static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn)
389 {
390         struct kvmgt_pgfn *p;
391
392         p = __kvmgt_protect_table_find(info, gfn);
393         if (p) {
394                 hash_del(&p->hnode);
395                 kfree(p);
396         }
397 }
398
399 static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
400                 size_t count, loff_t *ppos, bool iswrite)
401 {
402         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
403                         VFIO_PCI_NUM_REGIONS;
404         void *base = vgpu->region[i].data;
405         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
406
407
408         if (pos >= vgpu->region[i].size || iswrite) {
409                 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
410                 return -EINVAL;
411         }
412         count = min(count, (size_t)(vgpu->region[i].size - pos));
413         memcpy(buf, base + pos, count);
414
415         return count;
416 }
417
418 static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
419                 struct vfio_region *region)
420 {
421 }
422
423 static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
424         .rw = intel_vgpu_reg_rw_opregion,
425         .release = intel_vgpu_reg_release_opregion,
426 };
427
428 static int handle_edid_regs(struct intel_vgpu *vgpu,
429                         struct vfio_edid_region *region, char *buf,
430                         size_t count, u16 offset, bool is_write)
431 {
432         struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
433         unsigned int data;
434
435         if (offset + count > sizeof(*regs))
436                 return -EINVAL;
437
438         if (count != 4)
439                 return -EINVAL;
440
441         if (is_write) {
442                 data = *((unsigned int *)buf);
443                 switch (offset) {
444                 case offsetof(struct vfio_region_gfx_edid, link_state):
445                         if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
446                                 if (!drm_edid_block_valid(
447                                         (u8 *)region->edid_blob,
448                                         0,
449                                         true,
450                                         NULL)) {
451                                         gvt_vgpu_err("invalid EDID blob\n");
452                                         return -EINVAL;
453                                 }
454                                 intel_vgpu_emulate_hotplug(vgpu, true);
455                         } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
456                                 intel_vgpu_emulate_hotplug(vgpu, false);
457                         else {
458                                 gvt_vgpu_err("invalid EDID link state %d\n",
459                                         regs->link_state);
460                                 return -EINVAL;
461                         }
462                         regs->link_state = data;
463                         break;
464                 case offsetof(struct vfio_region_gfx_edid, edid_size):
465                         if (data > regs->edid_max_size) {
466                                 gvt_vgpu_err("EDID size is bigger than %d!\n",
467                                         regs->edid_max_size);
468                                 return -EINVAL;
469                         }
470                         regs->edid_size = data;
471                         break;
472                 default:
473                         /* read-only regs */
474                         gvt_vgpu_err("write read-only EDID region at offset %d\n",
475                                 offset);
476                         return -EPERM;
477                 }
478         } else {
479                 memcpy(buf, (char *)regs + offset, count);
480         }
481
482         return count;
483 }
484
485 static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
486                         size_t count, u16 offset, bool is_write)
487 {
488         if (offset + count > region->vfio_edid_regs.edid_size)
489                 return -EINVAL;
490
491         if (is_write)
492                 memcpy(region->edid_blob + offset, buf, count);
493         else
494                 memcpy(buf, region->edid_blob + offset, count);
495
496         return count;
497 }
498
499 static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
500                 size_t count, loff_t *ppos, bool iswrite)
501 {
502         int ret;
503         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
504                         VFIO_PCI_NUM_REGIONS;
505         struct vfio_edid_region *region = vgpu->region[i].data;
506         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
507
508         if (pos < region->vfio_edid_regs.edid_offset) {
509                 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
510         } else {
511                 pos -= EDID_BLOB_OFFSET;
512                 ret = handle_edid_blob(region, buf, count, pos, iswrite);
513         }
514
515         if (ret < 0)
516                 gvt_vgpu_err("failed to access EDID region\n");
517
518         return ret;
519 }
520
521 static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
522                                         struct vfio_region *region)
523 {
524         kfree(region->data);
525 }
526
527 static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
528         .rw = intel_vgpu_reg_rw_edid,
529         .release = intel_vgpu_reg_release_edid,
530 };
531
532 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
533                 unsigned int type, unsigned int subtype,
534                 const struct intel_vgpu_regops *ops,
535                 size_t size, u32 flags, void *data)
536 {
537         struct vfio_region *region;
538
539         region = krealloc(vgpu->region,
540                         (vgpu->num_regions + 1) * sizeof(*region),
541                         GFP_KERNEL);
542         if (!region)
543                 return -ENOMEM;
544
545         vgpu->region = region;
546         vgpu->region[vgpu->num_regions].type = type;
547         vgpu->region[vgpu->num_regions].subtype = subtype;
548         vgpu->region[vgpu->num_regions].ops = ops;
549         vgpu->region[vgpu->num_regions].size = size;
550         vgpu->region[vgpu->num_regions].flags = flags;
551         vgpu->region[vgpu->num_regions].data = data;
552         vgpu->num_regions++;
553         return 0;
554 }
555
556 int intel_gvt_set_opregion(struct intel_vgpu *vgpu)
557 {
558         void *base;
559         int ret;
560
561         /* Each vgpu has its own opregion, although VFIO would create another
562          * one later. This one is used to expose opregion to VFIO. And the
563          * other one created by VFIO later, is used by guest actually.
564          */
565         base = vgpu_opregion(vgpu)->va;
566         if (!base)
567                 return -ENOMEM;
568
569         if (memcmp(base, OPREGION_SIGNATURE, 16)) {
570                 memunmap(base);
571                 return -EINVAL;
572         }
573
574         ret = intel_vgpu_register_reg(vgpu,
575                         PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
576                         VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
577                         &intel_vgpu_regops_opregion, OPREGION_SIZE,
578                         VFIO_REGION_INFO_FLAG_READ, base);
579
580         return ret;
581 }
582
583 int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
584 {
585         struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
586         struct vfio_edid_region *base;
587         int ret;
588
589         base = kzalloc(sizeof(*base), GFP_KERNEL);
590         if (!base)
591                 return -ENOMEM;
592
593         /* TODO: Add multi-port and EDID extension block support */
594         base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
595         base->vfio_edid_regs.edid_max_size = EDID_SIZE;
596         base->vfio_edid_regs.edid_size = EDID_SIZE;
597         base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
598         base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
599         base->edid_blob = port->edid->edid_block;
600
601         ret = intel_vgpu_register_reg(vgpu,
602                         VFIO_REGION_TYPE_GFX,
603                         VFIO_REGION_SUBTYPE_GFX_EDID,
604                         &intel_vgpu_regops_edid, EDID_SIZE,
605                         VFIO_REGION_INFO_FLAG_READ |
606                         VFIO_REGION_INFO_FLAG_WRITE |
607                         VFIO_REGION_INFO_FLAG_CAPS, base);
608
609         return ret;
610 }
611
612 static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
613                                  u64 length)
614 {
615         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
616         struct gvt_dma *entry;
617         u64 iov_pfn = iova >> PAGE_SHIFT;
618         u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
619
620         mutex_lock(&vgpu->cache_lock);
621         for (; iov_pfn < end_iov_pfn; iov_pfn++) {
622                 entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
623                 if (!entry)
624                         continue;
625
626                 gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
627                                    entry->size);
628                 __gvt_cache_remove_entry(vgpu, entry);
629         }
630         mutex_unlock(&vgpu->cache_lock);
631 }
632
633 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
634 {
635         struct intel_vgpu *itr;
636         int id;
637         bool ret = false;
638
639         mutex_lock(&vgpu->gvt->lock);
640         for_each_active_vgpu(vgpu->gvt, itr, id) {
641                 if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status))
642                         continue;
643
644                 if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
645                         ret = true;
646                         goto out;
647                 }
648         }
649 out:
650         mutex_unlock(&vgpu->gvt->lock);
651         return ret;
652 }
653
654 static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
655 {
656         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
657
658         if (!vgpu->vfio_device.kvm ||
659             vgpu->vfio_device.kvm->mm != current->mm) {
660                 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
661                 return -ESRCH;
662         }
663
664         if (__kvmgt_vgpu_exist(vgpu))
665                 return -EEXIST;
666
667         vgpu->track_node.track_write = kvmgt_page_track_write;
668         vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
669         kvm_get_kvm(vgpu->vfio_device.kvm);
670         kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
671                                          &vgpu->track_node);
672
673         set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
674
675         debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
676                              &vgpu->nr_cache_entries);
677
678         intel_gvt_activate_vgpu(vgpu);
679
680         return 0;
681 }
682
683 static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
684 {
685         struct eventfd_ctx *trigger;
686
687         trigger = vgpu->msi_trigger;
688         if (trigger) {
689                 eventfd_ctx_put(trigger);
690                 vgpu->msi_trigger = NULL;
691         }
692 }
693
694 static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
695 {
696         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
697
698         intel_gvt_release_vgpu(vgpu);
699
700         clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
701
702         debugfs_lookup_and_remove(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs);
703
704         kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
705                                            &vgpu->track_node);
706         kvm_put_kvm(vgpu->vfio_device.kvm);
707
708         kvmgt_protect_table_destroy(vgpu);
709         gvt_cache_destroy(vgpu);
710
711         WARN_ON(vgpu->nr_cache_entries);
712
713         vgpu->gfn_cache = RB_ROOT;
714         vgpu->dma_addr_cache = RB_ROOT;
715
716         intel_vgpu_release_msi_eventfd_ctx(vgpu);
717 }
718
719 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
720 {
721         u32 start_lo, start_hi;
722         u32 mem_type;
723
724         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
725                         PCI_BASE_ADDRESS_MEM_MASK;
726         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
727                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
728
729         switch (mem_type) {
730         case PCI_BASE_ADDRESS_MEM_TYPE_64:
731                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
732                                                 + bar + 4));
733                 break;
734         case PCI_BASE_ADDRESS_MEM_TYPE_32:
735         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
736                 /* 1M mem BAR treated as 32-bit BAR */
737         default:
738                 /* mem unknown type treated as 32-bit BAR */
739                 start_hi = 0;
740                 break;
741         }
742
743         return ((u64)start_hi << 32) | start_lo;
744 }
745
746 static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
747                              void *buf, unsigned int count, bool is_write)
748 {
749         u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
750         int ret;
751
752         if (is_write)
753                 ret = intel_vgpu_emulate_mmio_write(vgpu,
754                                         bar_start + off, buf, count);
755         else
756                 ret = intel_vgpu_emulate_mmio_read(vgpu,
757                                         bar_start + off, buf, count);
758         return ret;
759 }
760
761 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
762 {
763         return off >= vgpu_aperture_offset(vgpu) &&
764                off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
765 }
766
767 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
768                 void *buf, unsigned long count, bool is_write)
769 {
770         void __iomem *aperture_va;
771
772         if (!intel_vgpu_in_aperture(vgpu, off) ||
773             !intel_vgpu_in_aperture(vgpu, off + count)) {
774                 gvt_vgpu_err("Invalid aperture offset %llu\n", off);
775                 return -EINVAL;
776         }
777
778         aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
779                                         ALIGN_DOWN(off, PAGE_SIZE),
780                                         count + offset_in_page(off));
781         if (!aperture_va)
782                 return -EIO;
783
784         if (is_write)
785                 memcpy_toio(aperture_va + offset_in_page(off), buf, count);
786         else
787                 memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
788
789         io_mapping_unmap(aperture_va);
790
791         return 0;
792 }
793
794 static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf,
795                         size_t count, loff_t *ppos, bool is_write)
796 {
797         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
798         u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
799         int ret = -EINVAL;
800
801
802         if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) {
803                 gvt_vgpu_err("invalid index: %u\n", index);
804                 return -EINVAL;
805         }
806
807         switch (index) {
808         case VFIO_PCI_CONFIG_REGION_INDEX:
809                 if (is_write)
810                         ret = intel_vgpu_emulate_cfg_write(vgpu, pos,
811                                                 buf, count);
812                 else
813                         ret = intel_vgpu_emulate_cfg_read(vgpu, pos,
814                                                 buf, count);
815                 break;
816         case VFIO_PCI_BAR0_REGION_INDEX:
817                 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
818                                         buf, count, is_write);
819                 break;
820         case VFIO_PCI_BAR2_REGION_INDEX:
821                 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
822                 break;
823         case VFIO_PCI_BAR1_REGION_INDEX:
824         case VFIO_PCI_BAR3_REGION_INDEX:
825         case VFIO_PCI_BAR4_REGION_INDEX:
826         case VFIO_PCI_BAR5_REGION_INDEX:
827         case VFIO_PCI_VGA_REGION_INDEX:
828         case VFIO_PCI_ROM_REGION_INDEX:
829                 break;
830         default:
831                 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions)
832                         return -EINVAL;
833
834                 index -= VFIO_PCI_NUM_REGIONS;
835                 return vgpu->region[index].ops->rw(vgpu, buf, count,
836                                 ppos, is_write);
837         }
838
839         return ret == 0 ? count : ret;
840 }
841
842 static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos)
843 {
844         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
845         struct intel_gvt *gvt = vgpu->gvt;
846         int offset;
847
848         /* Only allow MMIO GGTT entry access */
849         if (index != PCI_BASE_ADDRESS_0)
850                 return false;
851
852         offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
853                 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
854
855         return (offset >= gvt->device_info.gtt_start_offset &&
856                 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
857                         true : false;
858 }
859
860 static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf,
861                         size_t count, loff_t *ppos)
862 {
863         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
864         unsigned int done = 0;
865         int ret;
866
867         while (count) {
868                 size_t filled;
869
870                 /* Only support GGTT entry 8 bytes read */
871                 if (count >= 8 && !(*ppos % 8) &&
872                         gtt_entry(vgpu, ppos)) {
873                         u64 val;
874
875                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
876                                         ppos, false);
877                         if (ret <= 0)
878                                 goto read_err;
879
880                         if (copy_to_user(buf, &val, sizeof(val)))
881                                 goto read_err;
882
883                         filled = 8;
884                 } else if (count >= 4 && !(*ppos % 4)) {
885                         u32 val;
886
887                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
888                                         ppos, false);
889                         if (ret <= 0)
890                                 goto read_err;
891
892                         if (copy_to_user(buf, &val, sizeof(val)))
893                                 goto read_err;
894
895                         filled = 4;
896                 } else if (count >= 2 && !(*ppos % 2)) {
897                         u16 val;
898
899                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
900                                         ppos, false);
901                         if (ret <= 0)
902                                 goto read_err;
903
904                         if (copy_to_user(buf, &val, sizeof(val)))
905                                 goto read_err;
906
907                         filled = 2;
908                 } else {
909                         u8 val;
910
911                         ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos,
912                                         false);
913                         if (ret <= 0)
914                                 goto read_err;
915
916                         if (copy_to_user(buf, &val, sizeof(val)))
917                                 goto read_err;
918
919                         filled = 1;
920                 }
921
922                 count -= filled;
923                 done += filled;
924                 *ppos += filled;
925                 buf += filled;
926         }
927
928         return done;
929
930 read_err:
931         return -EFAULT;
932 }
933
934 static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev,
935                                 const char __user *buf,
936                                 size_t count, loff_t *ppos)
937 {
938         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
939         unsigned int done = 0;
940         int ret;
941
942         while (count) {
943                 size_t filled;
944
945                 /* Only support GGTT entry 8 bytes write */
946                 if (count >= 8 && !(*ppos % 8) &&
947                         gtt_entry(vgpu, ppos)) {
948                         u64 val;
949
950                         if (copy_from_user(&val, buf, sizeof(val)))
951                                 goto write_err;
952
953                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
954                                         ppos, true);
955                         if (ret <= 0)
956                                 goto write_err;
957
958                         filled = 8;
959                 } else if (count >= 4 && !(*ppos % 4)) {
960                         u32 val;
961
962                         if (copy_from_user(&val, buf, sizeof(val)))
963                                 goto write_err;
964
965                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
966                                         ppos, true);
967                         if (ret <= 0)
968                                 goto write_err;
969
970                         filled = 4;
971                 } else if (count >= 2 && !(*ppos % 2)) {
972                         u16 val;
973
974                         if (copy_from_user(&val, buf, sizeof(val)))
975                                 goto write_err;
976
977                         ret = intel_vgpu_rw(vgpu, (char *)&val,
978                                         sizeof(val), ppos, true);
979                         if (ret <= 0)
980                                 goto write_err;
981
982                         filled = 2;
983                 } else {
984                         u8 val;
985
986                         if (copy_from_user(&val, buf, sizeof(val)))
987                                 goto write_err;
988
989                         ret = intel_vgpu_rw(vgpu, &val, sizeof(val),
990                                         ppos, true);
991                         if (ret <= 0)
992                                 goto write_err;
993
994                         filled = 1;
995                 }
996
997                 count -= filled;
998                 done += filled;
999                 *ppos += filled;
1000                 buf += filled;
1001         }
1002
1003         return done;
1004 write_err:
1005         return -EFAULT;
1006 }
1007
1008 static int intel_vgpu_mmap(struct vfio_device *vfio_dev,
1009                 struct vm_area_struct *vma)
1010 {
1011         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1012         unsigned int index;
1013         u64 virtaddr;
1014         unsigned long req_size, pgoff, req_start;
1015         pgprot_t pg_prot;
1016
1017         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1018         if (index >= VFIO_PCI_ROM_REGION_INDEX)
1019                 return -EINVAL;
1020
1021         if (vma->vm_end < vma->vm_start)
1022                 return -EINVAL;
1023         if ((vma->vm_flags & VM_SHARED) == 0)
1024                 return -EINVAL;
1025         if (index != VFIO_PCI_BAR2_REGION_INDEX)
1026                 return -EINVAL;
1027
1028         pg_prot = vma->vm_page_prot;
1029         virtaddr = vma->vm_start;
1030         req_size = vma->vm_end - vma->vm_start;
1031         pgoff = vma->vm_pgoff &
1032                 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1033         req_start = pgoff << PAGE_SHIFT;
1034
1035         if (!intel_vgpu_in_aperture(vgpu, req_start))
1036                 return -EINVAL;
1037         if (req_start + req_size >
1038             vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1039                 return -EINVAL;
1040
1041         pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1042
1043         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1044 }
1045
1046 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1047 {
1048         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1049                 return 1;
1050
1051         return 0;
1052 }
1053
1054 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1055                         unsigned int index, unsigned int start,
1056                         unsigned int count, u32 flags,
1057                         void *data)
1058 {
1059         return 0;
1060 }
1061
1062 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1063                         unsigned int index, unsigned int start,
1064                         unsigned int count, u32 flags, void *data)
1065 {
1066         return 0;
1067 }
1068
1069 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1070                 unsigned int index, unsigned int start, unsigned int count,
1071                 u32 flags, void *data)
1072 {
1073         return 0;
1074 }
1075
1076 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1077                 unsigned int index, unsigned int start, unsigned int count,
1078                 u32 flags, void *data)
1079 {
1080         struct eventfd_ctx *trigger;
1081
1082         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1083                 int fd = *(int *)data;
1084
1085                 trigger = eventfd_ctx_fdget(fd);
1086                 if (IS_ERR(trigger)) {
1087                         gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1088                         return PTR_ERR(trigger);
1089                 }
1090                 vgpu->msi_trigger = trigger;
1091         } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1092                 intel_vgpu_release_msi_eventfd_ctx(vgpu);
1093
1094         return 0;
1095 }
1096
1097 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1098                 unsigned int index, unsigned int start, unsigned int count,
1099                 void *data)
1100 {
1101         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1102                         unsigned int start, unsigned int count, u32 flags,
1103                         void *data) = NULL;
1104
1105         switch (index) {
1106         case VFIO_PCI_INTX_IRQ_INDEX:
1107                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1108                 case VFIO_IRQ_SET_ACTION_MASK:
1109                         func = intel_vgpu_set_intx_mask;
1110                         break;
1111                 case VFIO_IRQ_SET_ACTION_UNMASK:
1112                         func = intel_vgpu_set_intx_unmask;
1113                         break;
1114                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1115                         func = intel_vgpu_set_intx_trigger;
1116                         break;
1117                 }
1118                 break;
1119         case VFIO_PCI_MSI_IRQ_INDEX:
1120                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1121                 case VFIO_IRQ_SET_ACTION_MASK:
1122                 case VFIO_IRQ_SET_ACTION_UNMASK:
1123                         /* XXX Need masking support exported */
1124                         break;
1125                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1126                         func = intel_vgpu_set_msi_trigger;
1127                         break;
1128                 }
1129                 break;
1130         }
1131
1132         if (!func)
1133                 return -ENOTTY;
1134
1135         return func(vgpu, index, start, count, flags, data);
1136 }
1137
1138 static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
1139                              unsigned long arg)
1140 {
1141         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1142         unsigned long minsz;
1143
1144         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1145
1146         if (cmd == VFIO_DEVICE_GET_INFO) {
1147                 struct vfio_device_info info;
1148
1149                 minsz = offsetofend(struct vfio_device_info, num_irqs);
1150
1151                 if (copy_from_user(&info, (void __user *)arg, minsz))
1152                         return -EFAULT;
1153
1154                 if (info.argsz < minsz)
1155                         return -EINVAL;
1156
1157                 info.flags = VFIO_DEVICE_FLAGS_PCI;
1158                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
1159                 info.num_regions = VFIO_PCI_NUM_REGIONS +
1160                                 vgpu->num_regions;
1161                 info.num_irqs = VFIO_PCI_NUM_IRQS;
1162
1163                 return copy_to_user((void __user *)arg, &info, minsz) ?
1164                         -EFAULT : 0;
1165
1166         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1167                 struct vfio_region_info info;
1168                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1169                 unsigned int i;
1170                 int ret;
1171                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1172                 int nr_areas = 1;
1173                 int cap_type_id;
1174
1175                 minsz = offsetofend(struct vfio_region_info, offset);
1176
1177                 if (copy_from_user(&info, (void __user *)arg, minsz))
1178                         return -EFAULT;
1179
1180                 if (info.argsz < minsz)
1181                         return -EINVAL;
1182
1183                 switch (info.index) {
1184                 case VFIO_PCI_CONFIG_REGION_INDEX:
1185                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1186                         info.size = vgpu->gvt->device_info.cfg_space_size;
1187                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1188                                      VFIO_REGION_INFO_FLAG_WRITE;
1189                         break;
1190                 case VFIO_PCI_BAR0_REGION_INDEX:
1191                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1192                         info.size = vgpu->cfg_space.bar[info.index].size;
1193                         if (!info.size) {
1194                                 info.flags = 0;
1195                                 break;
1196                         }
1197
1198                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1199                                      VFIO_REGION_INFO_FLAG_WRITE;
1200                         break;
1201                 case VFIO_PCI_BAR1_REGION_INDEX:
1202                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1203                         info.size = 0;
1204                         info.flags = 0;
1205                         break;
1206                 case VFIO_PCI_BAR2_REGION_INDEX:
1207                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1208                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1209                                         VFIO_REGION_INFO_FLAG_MMAP |
1210                                         VFIO_REGION_INFO_FLAG_READ |
1211                                         VFIO_REGION_INFO_FLAG_WRITE;
1212                         info.size = gvt_aperture_sz(vgpu->gvt);
1213
1214                         sparse = kzalloc(struct_size(sparse, areas, nr_areas),
1215                                          GFP_KERNEL);
1216                         if (!sparse)
1217                                 return -ENOMEM;
1218
1219                         sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1220                         sparse->header.version = 1;
1221                         sparse->nr_areas = nr_areas;
1222                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1223                         sparse->areas[0].offset =
1224                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1225                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1226                         break;
1227
1228                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1229                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1230                         info.size = 0;
1231                         info.flags = 0;
1232
1233                         gvt_dbg_core("get region info bar:%d\n", info.index);
1234                         break;
1235
1236                 case VFIO_PCI_ROM_REGION_INDEX:
1237                 case VFIO_PCI_VGA_REGION_INDEX:
1238                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1239                         info.size = 0;
1240                         info.flags = 0;
1241
1242                         gvt_dbg_core("get region info index:%d\n", info.index);
1243                         break;
1244                 default:
1245                         {
1246                                 struct vfio_region_info_cap_type cap_type = {
1247                                         .header.id = VFIO_REGION_INFO_CAP_TYPE,
1248                                         .header.version = 1 };
1249
1250                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
1251                                                 vgpu->num_regions)
1252                                         return -EINVAL;
1253                                 info.index =
1254                                         array_index_nospec(info.index,
1255                                                         VFIO_PCI_NUM_REGIONS +
1256                                                         vgpu->num_regions);
1257
1258                                 i = info.index - VFIO_PCI_NUM_REGIONS;
1259
1260                                 info.offset =
1261                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
1262                                 info.size = vgpu->region[i].size;
1263                                 info.flags = vgpu->region[i].flags;
1264
1265                                 cap_type.type = vgpu->region[i].type;
1266                                 cap_type.subtype = vgpu->region[i].subtype;
1267
1268                                 ret = vfio_info_add_capability(&caps,
1269                                                         &cap_type.header,
1270                                                         sizeof(cap_type));
1271                                 if (ret)
1272                                         return ret;
1273                         }
1274                 }
1275
1276                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1277                         switch (cap_type_id) {
1278                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1279                                 ret = vfio_info_add_capability(&caps,
1280                                         &sparse->header,
1281                                         struct_size(sparse, areas,
1282                                                     sparse->nr_areas));
1283                                 if (ret) {
1284                                         kfree(sparse);
1285                                         return ret;
1286                                 }
1287                                 break;
1288                         default:
1289                                 kfree(sparse);
1290                                 return -EINVAL;
1291                         }
1292                 }
1293
1294                 if (caps.size) {
1295                         info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1296                         if (info.argsz < sizeof(info) + caps.size) {
1297                                 info.argsz = sizeof(info) + caps.size;
1298                                 info.cap_offset = 0;
1299                         } else {
1300                                 vfio_info_cap_shift(&caps, sizeof(info));
1301                                 if (copy_to_user((void __user *)arg +
1302                                                   sizeof(info), caps.buf,
1303                                                   caps.size)) {
1304                                         kfree(caps.buf);
1305                                         kfree(sparse);
1306                                         return -EFAULT;
1307                                 }
1308                                 info.cap_offset = sizeof(info);
1309                         }
1310
1311                         kfree(caps.buf);
1312                 }
1313
1314                 kfree(sparse);
1315                 return copy_to_user((void __user *)arg, &info, minsz) ?
1316                         -EFAULT : 0;
1317         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1318                 struct vfio_irq_info info;
1319
1320                 minsz = offsetofend(struct vfio_irq_info, count);
1321
1322                 if (copy_from_user(&info, (void __user *)arg, minsz))
1323                         return -EFAULT;
1324
1325                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1326                         return -EINVAL;
1327
1328                 switch (info.index) {
1329                 case VFIO_PCI_INTX_IRQ_INDEX:
1330                 case VFIO_PCI_MSI_IRQ_INDEX:
1331                         break;
1332                 default:
1333                         return -EINVAL;
1334                 }
1335
1336                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1337
1338                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1339
1340                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1341                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1342                                        VFIO_IRQ_INFO_AUTOMASKED);
1343                 else
1344                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1345
1346                 return copy_to_user((void __user *)arg, &info, minsz) ?
1347                         -EFAULT : 0;
1348         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1349                 struct vfio_irq_set hdr;
1350                 u8 *data = NULL;
1351                 int ret = 0;
1352                 size_t data_size = 0;
1353
1354                 minsz = offsetofend(struct vfio_irq_set, count);
1355
1356                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1357                         return -EFAULT;
1358
1359                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1360                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1361
1362                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1363                                                 VFIO_PCI_NUM_IRQS, &data_size);
1364                         if (ret) {
1365                                 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1366                                 return -EINVAL;
1367                         }
1368                         if (data_size) {
1369                                 data = memdup_user((void __user *)(arg + minsz),
1370                                                    data_size);
1371                                 if (IS_ERR(data))
1372                                         return PTR_ERR(data);
1373                         }
1374                 }
1375
1376                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1377                                         hdr.start, hdr.count, data);
1378                 kfree(data);
1379
1380                 return ret;
1381         } else if (cmd == VFIO_DEVICE_RESET) {
1382                 intel_gvt_reset_vgpu(vgpu);
1383                 return 0;
1384         } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1385                 struct vfio_device_gfx_plane_info dmabuf;
1386                 int ret = 0;
1387
1388                 minsz = offsetofend(struct vfio_device_gfx_plane_info,
1389                                     dmabuf_id);
1390                 if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1391                         return -EFAULT;
1392                 if (dmabuf.argsz < minsz)
1393                         return -EINVAL;
1394
1395                 ret = intel_vgpu_query_plane(vgpu, &dmabuf);
1396                 if (ret != 0)
1397                         return ret;
1398
1399                 return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1400                                                                 -EFAULT : 0;
1401         } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1402                 __u32 dmabuf_id;
1403
1404                 if (get_user(dmabuf_id, (__u32 __user *)arg))
1405                         return -EFAULT;
1406                 return intel_vgpu_get_dmabuf(vgpu, dmabuf_id);
1407         }
1408
1409         return -ENOTTY;
1410 }
1411
1412 static ssize_t
1413 vgpu_id_show(struct device *dev, struct device_attribute *attr,
1414              char *buf)
1415 {
1416         struct intel_vgpu *vgpu = dev_get_drvdata(dev);
1417
1418         return sprintf(buf, "%d\n", vgpu->id);
1419 }
1420
1421 static DEVICE_ATTR_RO(vgpu_id);
1422
1423 static struct attribute *intel_vgpu_attrs[] = {
1424         &dev_attr_vgpu_id.attr,
1425         NULL
1426 };
1427
1428 static const struct attribute_group intel_vgpu_group = {
1429         .name = "intel_vgpu",
1430         .attrs = intel_vgpu_attrs,
1431 };
1432
1433 static const struct attribute_group *intel_vgpu_groups[] = {
1434         &intel_vgpu_group,
1435         NULL,
1436 };
1437
1438 static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
1439 {
1440         struct mdev_device *mdev = to_mdev_device(vfio_dev->dev);
1441         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1442         struct intel_vgpu_type *type =
1443                 container_of(mdev->type, struct intel_vgpu_type, type);
1444         int ret;
1445
1446         vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
1447         ret = intel_gvt_create_vgpu(vgpu, type->conf);
1448         if (ret)
1449                 return ret;
1450
1451         kvmgt_protect_table_init(vgpu);
1452         gvt_cache_init(vgpu);
1453
1454         return 0;
1455 }
1456
1457 static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
1458 {
1459         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1460
1461         intel_gvt_destroy_vgpu(vgpu);
1462 }
1463
1464 static const struct vfio_device_ops intel_vgpu_dev_ops = {
1465         .init           = intel_vgpu_init_dev,
1466         .release        = intel_vgpu_release_dev,
1467         .open_device    = intel_vgpu_open_device,
1468         .close_device   = intel_vgpu_close_device,
1469         .read           = intel_vgpu_read,
1470         .write          = intel_vgpu_write,
1471         .mmap           = intel_vgpu_mmap,
1472         .ioctl          = intel_vgpu_ioctl,
1473         .dma_unmap      = intel_vgpu_dma_unmap,
1474         .bind_iommufd   = vfio_iommufd_emulated_bind,
1475         .unbind_iommufd = vfio_iommufd_emulated_unbind,
1476         .attach_ioas    = vfio_iommufd_emulated_attach_ioas,
1477         .detach_ioas    = vfio_iommufd_emulated_detach_ioas,
1478 };
1479
1480 static int intel_vgpu_probe(struct mdev_device *mdev)
1481 {
1482         struct intel_vgpu *vgpu;
1483         int ret;
1484
1485         vgpu = vfio_alloc_device(intel_vgpu, vfio_device, &mdev->dev,
1486                                  &intel_vgpu_dev_ops);
1487         if (IS_ERR(vgpu)) {
1488                 gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu));
1489                 return PTR_ERR(vgpu);
1490         }
1491
1492         dev_set_drvdata(&mdev->dev, vgpu);
1493         ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device);
1494         if (ret)
1495                 goto out_put_vdev;
1496
1497         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
1498                      dev_name(mdev_dev(mdev)));
1499         return 0;
1500
1501 out_put_vdev:
1502         vfio_put_device(&vgpu->vfio_device);
1503         return ret;
1504 }
1505
1506 static void intel_vgpu_remove(struct mdev_device *mdev)
1507 {
1508         struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev);
1509
1510         vfio_unregister_group_dev(&vgpu->vfio_device);
1511         vfio_put_device(&vgpu->vfio_device);
1512 }
1513
1514 static unsigned int intel_vgpu_get_available(struct mdev_type *mtype)
1515 {
1516         struct intel_vgpu_type *type =
1517                 container_of(mtype, struct intel_vgpu_type, type);
1518         struct intel_gvt *gvt = kdev_to_i915(mtype->parent->dev)->gvt;
1519         unsigned int low_gm_avail, high_gm_avail, fence_avail;
1520
1521         mutex_lock(&gvt->lock);
1522         low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
1523                 gvt->gm.vgpu_allocated_low_gm_size;
1524         high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
1525                 gvt->gm.vgpu_allocated_high_gm_size;
1526         fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
1527                 gvt->fence.vgpu_allocated_fence_num;
1528         mutex_unlock(&gvt->lock);
1529
1530         return min3(low_gm_avail / type->conf->low_mm,
1531                     high_gm_avail / type->conf->high_mm,
1532                     fence_avail / type->conf->fence);
1533 }
1534
1535 static struct mdev_driver intel_vgpu_mdev_driver = {
1536         .device_api     = VFIO_DEVICE_API_PCI_STRING,
1537         .driver = {
1538                 .name           = "intel_vgpu_mdev",
1539                 .owner          = THIS_MODULE,
1540                 .dev_groups     = intel_vgpu_groups,
1541         },
1542         .probe                  = intel_vgpu_probe,
1543         .remove                 = intel_vgpu_remove,
1544         .get_available          = intel_vgpu_get_available,
1545         .show_description       = intel_vgpu_show_description,
1546 };
1547
1548 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
1549 {
1550         struct kvm *kvm = info->vfio_device.kvm;
1551         struct kvm_memory_slot *slot;
1552         int idx;
1553
1554         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
1555                 return -ESRCH;
1556
1557         idx = srcu_read_lock(&kvm->srcu);
1558         slot = gfn_to_memslot(kvm, gfn);
1559         if (!slot) {
1560                 srcu_read_unlock(&kvm->srcu, idx);
1561                 return -EINVAL;
1562         }
1563
1564         write_lock(&kvm->mmu_lock);
1565
1566         if (kvmgt_gfn_is_write_protected(info, gfn))
1567                 goto out;
1568
1569         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1570         kvmgt_protect_table_add(info, gfn);
1571
1572 out:
1573         write_unlock(&kvm->mmu_lock);
1574         srcu_read_unlock(&kvm->srcu, idx);
1575         return 0;
1576 }
1577
1578 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
1579 {
1580         struct kvm *kvm = info->vfio_device.kvm;
1581         struct kvm_memory_slot *slot;
1582         int idx;
1583
1584         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
1585                 return -ESRCH;
1586
1587         idx = srcu_read_lock(&kvm->srcu);
1588         slot = gfn_to_memslot(kvm, gfn);
1589         if (!slot) {
1590                 srcu_read_unlock(&kvm->srcu, idx);
1591                 return -EINVAL;
1592         }
1593
1594         write_lock(&kvm->mmu_lock);
1595
1596         if (!kvmgt_gfn_is_write_protected(info, gfn))
1597                 goto out;
1598
1599         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1600         kvmgt_protect_table_del(info, gfn);
1601
1602 out:
1603         write_unlock(&kvm->mmu_lock);
1604         srcu_read_unlock(&kvm->srcu, idx);
1605         return 0;
1606 }
1607
1608 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1609                 const u8 *val, int len,
1610                 struct kvm_page_track_notifier_node *node)
1611 {
1612         struct intel_vgpu *info =
1613                 container_of(node, struct intel_vgpu, track_node);
1614
1615         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1616                 intel_vgpu_page_track_handler(info, gpa,
1617                                                      (void *)val, len);
1618 }
1619
1620 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1621                 struct kvm_memory_slot *slot,
1622                 struct kvm_page_track_notifier_node *node)
1623 {
1624         int i;
1625         gfn_t gfn;
1626         struct intel_vgpu *info =
1627                 container_of(node, struct intel_vgpu, track_node);
1628
1629         write_lock(&kvm->mmu_lock);
1630         for (i = 0; i < slot->npages; i++) {
1631                 gfn = slot->base_gfn + i;
1632                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1633                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1634                                                 KVM_PAGE_TRACK_WRITE);
1635                         kvmgt_protect_table_del(info, gfn);
1636                 }
1637         }
1638         write_unlock(&kvm->mmu_lock);
1639 }
1640
1641 void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
1642 {
1643         int i;
1644
1645         if (!vgpu->region)
1646                 return;
1647
1648         for (i = 0; i < vgpu->num_regions; i++)
1649                 if (vgpu->region[i].ops->release)
1650                         vgpu->region[i].ops->release(vgpu,
1651                                         &vgpu->region[i]);
1652         vgpu->num_regions = 0;
1653         kfree(vgpu->region);
1654         vgpu->region = NULL;
1655 }
1656
1657 int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
1658                 unsigned long size, dma_addr_t *dma_addr)
1659 {
1660         struct gvt_dma *entry;
1661         int ret;
1662
1663         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1664                 return -EINVAL;
1665
1666         mutex_lock(&vgpu->cache_lock);
1667
1668         entry = __gvt_cache_find_gfn(vgpu, gfn);
1669         if (!entry) {
1670                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1671                 if (ret)
1672                         goto err_unlock;
1673
1674                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1675                 if (ret)
1676                         goto err_unmap;
1677         } else if (entry->size != size) {
1678                 /* the same gfn with different size: unmap and re-map */
1679                 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
1680                 __gvt_cache_remove_entry(vgpu, entry);
1681
1682                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1683                 if (ret)
1684                         goto err_unlock;
1685
1686                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1687                 if (ret)
1688                         goto err_unmap;
1689         } else {
1690                 kref_get(&entry->ref);
1691                 *dma_addr = entry->dma_addr;
1692         }
1693
1694         mutex_unlock(&vgpu->cache_lock);
1695         return 0;
1696
1697 err_unmap:
1698         gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
1699 err_unlock:
1700         mutex_unlock(&vgpu->cache_lock);
1701         return ret;
1702 }
1703
1704 int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr)
1705 {
1706         struct gvt_dma *entry;
1707         int ret = 0;
1708
1709         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1710                 return -EINVAL;
1711
1712         mutex_lock(&vgpu->cache_lock);
1713         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1714         if (entry)
1715                 kref_get(&entry->ref);
1716         else
1717                 ret = -ENOMEM;
1718         mutex_unlock(&vgpu->cache_lock);
1719
1720         return ret;
1721 }
1722
1723 static void __gvt_dma_release(struct kref *ref)
1724 {
1725         struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
1726
1727         gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
1728                            entry->size);
1729         __gvt_cache_remove_entry(entry->vgpu, entry);
1730 }
1731
1732 void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu,
1733                 dma_addr_t dma_addr)
1734 {
1735         struct gvt_dma *entry;
1736
1737         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1738                 return;
1739
1740         mutex_lock(&vgpu->cache_lock);
1741         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1742         if (entry)
1743                 kref_put(&entry->ref, __gvt_dma_release);
1744         mutex_unlock(&vgpu->cache_lock);
1745 }
1746
1747 static void init_device_info(struct intel_gvt *gvt)
1748 {
1749         struct intel_gvt_device_info *info = &gvt->device_info;
1750         struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
1751
1752         info->max_support_vgpus = 8;
1753         info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE;
1754         info->mmio_size = 2 * 1024 * 1024;
1755         info->mmio_bar = 0;
1756         info->gtt_start_offset = 8 * 1024 * 1024;
1757         info->gtt_entry_size = 8;
1758         info->gtt_entry_size_shift = 3;
1759         info->gmadr_bytes_in_cmd = 8;
1760         info->max_surface_size = 36 * 1024 * 1024;
1761         info->msi_cap_offset = pdev->msi_cap;
1762 }
1763
1764 static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt)
1765 {
1766         struct intel_vgpu *vgpu;
1767         int id;
1768
1769         mutex_lock(&gvt->lock);
1770         idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) {
1771                 if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id,
1772                                        (void *)&gvt->service_request)) {
1773                         if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
1774                                 intel_vgpu_emulate_vblank(vgpu);
1775                 }
1776         }
1777         mutex_unlock(&gvt->lock);
1778 }
1779
1780 static int gvt_service_thread(void *data)
1781 {
1782         struct intel_gvt *gvt = (struct intel_gvt *)data;
1783         int ret;
1784
1785         gvt_dbg_core("service thread start\n");
1786
1787         while (!kthread_should_stop()) {
1788                 ret = wait_event_interruptible(gvt->service_thread_wq,
1789                                 kthread_should_stop() || gvt->service_request);
1790
1791                 if (kthread_should_stop())
1792                         break;
1793
1794                 if (WARN_ONCE(ret, "service thread is waken up by signal.\n"))
1795                         continue;
1796
1797                 intel_gvt_test_and_emulate_vblank(gvt);
1798
1799                 if (test_bit(INTEL_GVT_REQUEST_SCHED,
1800                                 (void *)&gvt->service_request) ||
1801                         test_bit(INTEL_GVT_REQUEST_EVENT_SCHED,
1802                                         (void *)&gvt->service_request)) {
1803                         intel_gvt_schedule(gvt);
1804                 }
1805         }
1806
1807         return 0;
1808 }
1809
1810 static void clean_service_thread(struct intel_gvt *gvt)
1811 {
1812         kthread_stop(gvt->service_thread);
1813 }
1814
1815 static int init_service_thread(struct intel_gvt *gvt)
1816 {
1817         init_waitqueue_head(&gvt->service_thread_wq);
1818
1819         gvt->service_thread = kthread_run(gvt_service_thread,
1820                         gvt, "gvt_service_thread");
1821         if (IS_ERR(gvt->service_thread)) {
1822                 gvt_err("fail to start service thread.\n");
1823                 return PTR_ERR(gvt->service_thread);
1824         }
1825         return 0;
1826 }
1827
1828 /**
1829  * intel_gvt_clean_device - clean a GVT device
1830  * @i915: i915 private
1831  *
1832  * This function is called at the driver unloading stage, to free the
1833  * resources owned by a GVT device.
1834  *
1835  */
1836 static void intel_gvt_clean_device(struct drm_i915_private *i915)
1837 {
1838         struct intel_gvt *gvt = fetch_and_zero(&i915->gvt);
1839
1840         if (drm_WARN_ON(&i915->drm, !gvt))
1841                 return;
1842
1843         mdev_unregister_parent(&gvt->parent);
1844         intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1845         intel_gvt_clean_vgpu_types(gvt);
1846
1847         intel_gvt_debugfs_clean(gvt);
1848         clean_service_thread(gvt);
1849         intel_gvt_clean_cmd_parser(gvt);
1850         intel_gvt_clean_sched_policy(gvt);
1851         intel_gvt_clean_workload_scheduler(gvt);
1852         intel_gvt_clean_gtt(gvt);
1853         intel_gvt_free_firmware(gvt);
1854         intel_gvt_clean_mmio_info(gvt);
1855         idr_destroy(&gvt->vgpu_idr);
1856
1857         kfree(i915->gvt);
1858 }
1859
1860 /**
1861  * intel_gvt_init_device - initialize a GVT device
1862  * @i915: drm i915 private data
1863  *
1864  * This function is called at the initialization stage, to initialize
1865  * necessary GVT components.
1866  *
1867  * Returns:
1868  * Zero on success, negative error code if failed.
1869  *
1870  */
1871 static int intel_gvt_init_device(struct drm_i915_private *i915)
1872 {
1873         struct intel_gvt *gvt;
1874         struct intel_vgpu *vgpu;
1875         int ret;
1876
1877         if (drm_WARN_ON(&i915->drm, i915->gvt))
1878                 return -EEXIST;
1879
1880         gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL);
1881         if (!gvt)
1882                 return -ENOMEM;
1883
1884         gvt_dbg_core("init gvt device\n");
1885
1886         idr_init_base(&gvt->vgpu_idr, 1);
1887         spin_lock_init(&gvt->scheduler.mmio_context_lock);
1888         mutex_init(&gvt->lock);
1889         mutex_init(&gvt->sched_lock);
1890         gvt->gt = to_gt(i915);
1891         i915->gvt = gvt;
1892
1893         init_device_info(gvt);
1894
1895         ret = intel_gvt_setup_mmio_info(gvt);
1896         if (ret)
1897                 goto out_clean_idr;
1898
1899         intel_gvt_init_engine_mmio_context(gvt);
1900
1901         ret = intel_gvt_load_firmware(gvt);
1902         if (ret)
1903                 goto out_clean_mmio_info;
1904
1905         ret = intel_gvt_init_irq(gvt);
1906         if (ret)
1907                 goto out_free_firmware;
1908
1909         ret = intel_gvt_init_gtt(gvt);
1910         if (ret)
1911                 goto out_free_firmware;
1912
1913         ret = intel_gvt_init_workload_scheduler(gvt);
1914         if (ret)
1915                 goto out_clean_gtt;
1916
1917         ret = intel_gvt_init_sched_policy(gvt);
1918         if (ret)
1919                 goto out_clean_workload_scheduler;
1920
1921         ret = intel_gvt_init_cmd_parser(gvt);
1922         if (ret)
1923                 goto out_clean_sched_policy;
1924
1925         ret = init_service_thread(gvt);
1926         if (ret)
1927                 goto out_clean_cmd_parser;
1928
1929         ret = intel_gvt_init_vgpu_types(gvt);
1930         if (ret)
1931                 goto out_clean_thread;
1932
1933         vgpu = intel_gvt_create_idle_vgpu(gvt);
1934         if (IS_ERR(vgpu)) {
1935                 ret = PTR_ERR(vgpu);
1936                 gvt_err("failed to create idle vgpu\n");
1937                 goto out_clean_types;
1938         }
1939         gvt->idle_vgpu = vgpu;
1940
1941         intel_gvt_debugfs_init(gvt);
1942
1943         ret = mdev_register_parent(&gvt->parent, i915->drm.dev,
1944                                    &intel_vgpu_mdev_driver,
1945                                    gvt->mdev_types, gvt->num_types);
1946         if (ret)
1947                 goto out_destroy_idle_vgpu;
1948
1949         gvt_dbg_core("gvt device initialization is done\n");
1950         return 0;
1951
1952 out_destroy_idle_vgpu:
1953         intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1954         intel_gvt_debugfs_clean(gvt);
1955 out_clean_types:
1956         intel_gvt_clean_vgpu_types(gvt);
1957 out_clean_thread:
1958         clean_service_thread(gvt);
1959 out_clean_cmd_parser:
1960         intel_gvt_clean_cmd_parser(gvt);
1961 out_clean_sched_policy:
1962         intel_gvt_clean_sched_policy(gvt);
1963 out_clean_workload_scheduler:
1964         intel_gvt_clean_workload_scheduler(gvt);
1965 out_clean_gtt:
1966         intel_gvt_clean_gtt(gvt);
1967 out_free_firmware:
1968         intel_gvt_free_firmware(gvt);
1969 out_clean_mmio_info:
1970         intel_gvt_clean_mmio_info(gvt);
1971 out_clean_idr:
1972         idr_destroy(&gvt->vgpu_idr);
1973         kfree(gvt);
1974         i915->gvt = NULL;
1975         return ret;
1976 }
1977
1978 static void intel_gvt_pm_resume(struct drm_i915_private *i915)
1979 {
1980         struct intel_gvt *gvt = i915->gvt;
1981
1982         intel_gvt_restore_fence(gvt);
1983         intel_gvt_restore_mmio(gvt);
1984         intel_gvt_restore_ggtt(gvt);
1985 }
1986
1987 static const struct intel_vgpu_ops intel_gvt_vgpu_ops = {
1988         .init_device    = intel_gvt_init_device,
1989         .clean_device   = intel_gvt_clean_device,
1990         .pm_resume      = intel_gvt_pm_resume,
1991 };
1992
1993 static int __init kvmgt_init(void)
1994 {
1995         int ret;
1996
1997         ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops);
1998         if (ret)
1999                 return ret;
2000
2001         ret = mdev_register_driver(&intel_vgpu_mdev_driver);
2002         if (ret)
2003                 intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2004         return ret;
2005 }
2006
2007 static void __exit kvmgt_exit(void)
2008 {
2009         mdev_unregister_driver(&intel_vgpu_mdev_driver);
2010         intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2011 }
2012
2013 module_init(kvmgt_init);
2014 module_exit(kvmgt_exit);
2015
2016 MODULE_LICENSE("GPL and additional rights");
2017 MODULE_AUTHOR("Intel Corporation");