OSDN Git Service

vduse: avoid empty string for dev name
[tomoyo/tomoyo-test1.git] / drivers / vdpa / vdpa_user / vduse_dev.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDUSE: vDPA Device in Userspace
4  *
5  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6  *
7  * Author: Xie Yongji <xieyongji@bytedance.com>
8  *
9  */
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/eventfd.h>
16 #include <linux/slab.h>
17 #include <linux/wait.h>
18 #include <linux/dma-map-ops.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/uio.h>
22 #include <linux/vdpa.h>
23 #include <linux/nospec.h>
24 #include <linux/vmalloc.h>
25 #include <linux/sched/mm.h>
26 #include <uapi/linux/vduse.h>
27 #include <uapi/linux/vdpa.h>
28 #include <uapi/linux/virtio_config.h>
29 #include <uapi/linux/virtio_ids.h>
30 #include <uapi/linux/virtio_blk.h>
31 #include <linux/mod_devicetable.h>
32
33 #include "iova_domain.h"
34
35 #define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
36 #define DRV_DESC     "vDPA Device in Userspace"
37 #define DRV_LICENSE  "GPL v2"
38
39 #define VDUSE_DEV_MAX (1U << MINORBITS)
40 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
41 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
42 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
43 /* 128 MB reserved for virtqueue creation */
44 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
45 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
46
47 #define IRQ_UNBOUND -1
48
49 struct vduse_virtqueue {
50         u16 index;
51         u16 num_max;
52         u32 num;
53         u64 desc_addr;
54         u64 driver_addr;
55         u64 device_addr;
56         struct vdpa_vq_state state;
57         bool ready;
58         bool kicked;
59         spinlock_t kick_lock;
60         spinlock_t irq_lock;
61         struct eventfd_ctx *kickfd;
62         struct vdpa_callback cb;
63         struct work_struct inject;
64         struct work_struct kick;
65         int irq_effective_cpu;
66         struct cpumask irq_affinity;
67         struct kobject kobj;
68 };
69
70 struct vduse_dev;
71
72 struct vduse_vdpa {
73         struct vdpa_device vdpa;
74         struct vduse_dev *dev;
75 };
76
77 struct vduse_umem {
78         unsigned long iova;
79         unsigned long npages;
80         struct page **pages;
81         struct mm_struct *mm;
82 };
83
84 struct vduse_dev {
85         struct vduse_vdpa *vdev;
86         struct device *dev;
87         struct vduse_virtqueue **vqs;
88         struct vduse_iova_domain *domain;
89         char *name;
90         struct mutex lock;
91         spinlock_t msg_lock;
92         u64 msg_unique;
93         u32 msg_timeout;
94         wait_queue_head_t waitq;
95         struct list_head send_list;
96         struct list_head recv_list;
97         struct vdpa_callback config_cb;
98         struct work_struct inject;
99         spinlock_t irq_lock;
100         struct rw_semaphore rwsem;
101         int minor;
102         bool broken;
103         bool connected;
104         u64 api_version;
105         u64 device_features;
106         u64 driver_features;
107         u32 device_id;
108         u32 vendor_id;
109         u32 generation;
110         u32 config_size;
111         void *config;
112         u8 status;
113         u32 vq_num;
114         u32 vq_align;
115         struct vduse_umem *umem;
116         struct mutex mem_lock;
117         unsigned int bounce_size;
118         struct mutex domain_lock;
119 };
120
121 struct vduse_dev_msg {
122         struct vduse_dev_request req;
123         struct vduse_dev_response resp;
124         struct list_head list;
125         wait_queue_head_t waitq;
126         bool completed;
127 };
128
129 struct vduse_control {
130         u64 api_version;
131 };
132
133 static DEFINE_MUTEX(vduse_lock);
134 static DEFINE_IDR(vduse_idr);
135
136 static dev_t vduse_major;
137 static struct class *vduse_class;
138 static struct cdev vduse_ctrl_cdev;
139 static struct cdev vduse_cdev;
140 static struct workqueue_struct *vduse_irq_wq;
141 static struct workqueue_struct *vduse_irq_bound_wq;
142
143 static u32 allowed_device_id[] = {
144         VIRTIO_ID_BLOCK,
145 };
146
147 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
148 {
149         struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
150
151         return vdev->dev;
152 }
153
154 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
155 {
156         struct vdpa_device *vdpa = dev_to_vdpa(dev);
157
158         return vdpa_to_vduse(vdpa);
159 }
160
161 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
162                                             uint32_t request_id)
163 {
164         struct vduse_dev_msg *msg;
165
166         list_for_each_entry(msg, head, list) {
167                 if (msg->req.request_id == request_id) {
168                         list_del(&msg->list);
169                         return msg;
170                 }
171         }
172
173         return NULL;
174 }
175
176 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
177 {
178         struct vduse_dev_msg *msg = NULL;
179
180         if (!list_empty(head)) {
181                 msg = list_first_entry(head, struct vduse_dev_msg, list);
182                 list_del(&msg->list);
183         }
184
185         return msg;
186 }
187
188 static void vduse_enqueue_msg(struct list_head *head,
189                               struct vduse_dev_msg *msg)
190 {
191         list_add_tail(&msg->list, head);
192 }
193
194 static void vduse_dev_broken(struct vduse_dev *dev)
195 {
196         struct vduse_dev_msg *msg, *tmp;
197
198         if (unlikely(dev->broken))
199                 return;
200
201         list_splice_init(&dev->recv_list, &dev->send_list);
202         list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
203                 list_del(&msg->list);
204                 msg->completed = 1;
205                 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
206                 wake_up(&msg->waitq);
207         }
208         dev->broken = true;
209         wake_up(&dev->waitq);
210 }
211
212 static int vduse_dev_msg_sync(struct vduse_dev *dev,
213                               struct vduse_dev_msg *msg)
214 {
215         int ret;
216
217         if (unlikely(dev->broken))
218                 return -EIO;
219
220         init_waitqueue_head(&msg->waitq);
221         spin_lock(&dev->msg_lock);
222         if (unlikely(dev->broken)) {
223                 spin_unlock(&dev->msg_lock);
224                 return -EIO;
225         }
226         msg->req.request_id = dev->msg_unique++;
227         vduse_enqueue_msg(&dev->send_list, msg);
228         wake_up(&dev->waitq);
229         spin_unlock(&dev->msg_lock);
230         if (dev->msg_timeout)
231                 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
232                                                   (long)dev->msg_timeout * HZ);
233         else
234                 ret = wait_event_killable(msg->waitq, msg->completed);
235
236         spin_lock(&dev->msg_lock);
237         if (!msg->completed) {
238                 list_del(&msg->list);
239                 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
240                 /* Mark the device as malfunction when there is a timeout */
241                 if (!ret)
242                         vduse_dev_broken(dev);
243         }
244         ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
245         spin_unlock(&dev->msg_lock);
246
247         return ret;
248 }
249
250 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
251                                          struct vduse_virtqueue *vq,
252                                          struct vdpa_vq_state_packed *packed)
253 {
254         struct vduse_dev_msg msg = { 0 };
255         int ret;
256
257         msg.req.type = VDUSE_GET_VQ_STATE;
258         msg.req.vq_state.index = vq->index;
259
260         ret = vduse_dev_msg_sync(dev, &msg);
261         if (ret)
262                 return ret;
263
264         packed->last_avail_counter =
265                         msg.resp.vq_state.packed.last_avail_counter & 0x0001;
266         packed->last_avail_idx =
267                         msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
268         packed->last_used_counter =
269                         msg.resp.vq_state.packed.last_used_counter & 0x0001;
270         packed->last_used_idx =
271                         msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
272
273         return 0;
274 }
275
276 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
277                                         struct vduse_virtqueue *vq,
278                                         struct vdpa_vq_state_split *split)
279 {
280         struct vduse_dev_msg msg = { 0 };
281         int ret;
282
283         msg.req.type = VDUSE_GET_VQ_STATE;
284         msg.req.vq_state.index = vq->index;
285
286         ret = vduse_dev_msg_sync(dev, &msg);
287         if (ret)
288                 return ret;
289
290         split->avail_index = msg.resp.vq_state.split.avail_index;
291
292         return 0;
293 }
294
295 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
296 {
297         struct vduse_dev_msg msg = { 0 };
298
299         msg.req.type = VDUSE_SET_STATUS;
300         msg.req.s.status = status;
301
302         return vduse_dev_msg_sync(dev, &msg);
303 }
304
305 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
306                                   u64 start, u64 last)
307 {
308         struct vduse_dev_msg msg = { 0 };
309
310         if (last < start)
311                 return -EINVAL;
312
313         msg.req.type = VDUSE_UPDATE_IOTLB;
314         msg.req.iova.start = start;
315         msg.req.iova.last = last;
316
317         return vduse_dev_msg_sync(dev, &msg);
318 }
319
320 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
321 {
322         struct file *file = iocb->ki_filp;
323         struct vduse_dev *dev = file->private_data;
324         struct vduse_dev_msg *msg;
325         int size = sizeof(struct vduse_dev_request);
326         ssize_t ret;
327
328         if (iov_iter_count(to) < size)
329                 return -EINVAL;
330
331         spin_lock(&dev->msg_lock);
332         while (1) {
333                 msg = vduse_dequeue_msg(&dev->send_list);
334                 if (msg)
335                         break;
336
337                 ret = -EAGAIN;
338                 if (file->f_flags & O_NONBLOCK)
339                         goto unlock;
340
341                 spin_unlock(&dev->msg_lock);
342                 ret = wait_event_interruptible_exclusive(dev->waitq,
343                                         !list_empty(&dev->send_list));
344                 if (ret)
345                         return ret;
346
347                 spin_lock(&dev->msg_lock);
348         }
349         spin_unlock(&dev->msg_lock);
350         ret = copy_to_iter(&msg->req, size, to);
351         spin_lock(&dev->msg_lock);
352         if (ret != size) {
353                 ret = -EFAULT;
354                 vduse_enqueue_msg(&dev->send_list, msg);
355                 goto unlock;
356         }
357         vduse_enqueue_msg(&dev->recv_list, msg);
358 unlock:
359         spin_unlock(&dev->msg_lock);
360
361         return ret;
362 }
363
364 static bool is_mem_zero(const char *ptr, int size)
365 {
366         int i;
367
368         for (i = 0; i < size; i++) {
369                 if (ptr[i])
370                         return false;
371         }
372         return true;
373 }
374
375 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
376 {
377         struct file *file = iocb->ki_filp;
378         struct vduse_dev *dev = file->private_data;
379         struct vduse_dev_response resp;
380         struct vduse_dev_msg *msg;
381         size_t ret;
382
383         ret = copy_from_iter(&resp, sizeof(resp), from);
384         if (ret != sizeof(resp))
385                 return -EINVAL;
386
387         if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
388                 return -EINVAL;
389
390         spin_lock(&dev->msg_lock);
391         msg = vduse_find_msg(&dev->recv_list, resp.request_id);
392         if (!msg) {
393                 ret = -ENOENT;
394                 goto unlock;
395         }
396
397         memcpy(&msg->resp, &resp, sizeof(resp));
398         msg->completed = 1;
399         wake_up(&msg->waitq);
400 unlock:
401         spin_unlock(&dev->msg_lock);
402
403         return ret;
404 }
405
406 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
407 {
408         struct vduse_dev *dev = file->private_data;
409         __poll_t mask = 0;
410
411         poll_wait(file, &dev->waitq, wait);
412
413         spin_lock(&dev->msg_lock);
414
415         if (unlikely(dev->broken))
416                 mask |= EPOLLERR;
417         if (!list_empty(&dev->send_list))
418                 mask |= EPOLLIN | EPOLLRDNORM;
419         if (!list_empty(&dev->recv_list))
420                 mask |= EPOLLOUT | EPOLLWRNORM;
421
422         spin_unlock(&dev->msg_lock);
423
424         return mask;
425 }
426
427 static void vduse_dev_reset(struct vduse_dev *dev)
428 {
429         int i;
430         struct vduse_iova_domain *domain = dev->domain;
431
432         /* The coherent mappings are handled in vduse_dev_free_coherent() */
433         if (domain && domain->bounce_map)
434                 vduse_domain_reset_bounce_map(domain);
435
436         down_write(&dev->rwsem);
437
438         dev->status = 0;
439         dev->driver_features = 0;
440         dev->generation++;
441         spin_lock(&dev->irq_lock);
442         dev->config_cb.callback = NULL;
443         dev->config_cb.private = NULL;
444         spin_unlock(&dev->irq_lock);
445         flush_work(&dev->inject);
446
447         for (i = 0; i < dev->vq_num; i++) {
448                 struct vduse_virtqueue *vq = dev->vqs[i];
449
450                 vq->ready = false;
451                 vq->desc_addr = 0;
452                 vq->driver_addr = 0;
453                 vq->device_addr = 0;
454                 vq->num = 0;
455                 memset(&vq->state, 0, sizeof(vq->state));
456
457                 spin_lock(&vq->kick_lock);
458                 vq->kicked = false;
459                 if (vq->kickfd)
460                         eventfd_ctx_put(vq->kickfd);
461                 vq->kickfd = NULL;
462                 spin_unlock(&vq->kick_lock);
463
464                 spin_lock(&vq->irq_lock);
465                 vq->cb.callback = NULL;
466                 vq->cb.private = NULL;
467                 vq->cb.trigger = NULL;
468                 spin_unlock(&vq->irq_lock);
469                 flush_work(&vq->inject);
470                 flush_work(&vq->kick);
471         }
472
473         up_write(&dev->rwsem);
474 }
475
476 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
477                                 u64 desc_area, u64 driver_area,
478                                 u64 device_area)
479 {
480         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
481         struct vduse_virtqueue *vq = dev->vqs[idx];
482
483         vq->desc_addr = desc_area;
484         vq->driver_addr = driver_area;
485         vq->device_addr = device_area;
486
487         return 0;
488 }
489
490 static void vduse_vq_kick(struct vduse_virtqueue *vq)
491 {
492         spin_lock(&vq->kick_lock);
493         if (!vq->ready)
494                 goto unlock;
495
496         if (vq->kickfd)
497                 eventfd_signal(vq->kickfd, 1);
498         else
499                 vq->kicked = true;
500 unlock:
501         spin_unlock(&vq->kick_lock);
502 }
503
504 static void vduse_vq_kick_work(struct work_struct *work)
505 {
506         struct vduse_virtqueue *vq = container_of(work,
507                                         struct vduse_virtqueue, kick);
508
509         vduse_vq_kick(vq);
510 }
511
512 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
513 {
514         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
515         struct vduse_virtqueue *vq = dev->vqs[idx];
516
517         if (!eventfd_signal_allowed()) {
518                 schedule_work(&vq->kick);
519                 return;
520         }
521         vduse_vq_kick(vq);
522 }
523
524 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
525                               struct vdpa_callback *cb)
526 {
527         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
528         struct vduse_virtqueue *vq = dev->vqs[idx];
529
530         spin_lock(&vq->irq_lock);
531         vq->cb.callback = cb->callback;
532         vq->cb.private = cb->private;
533         vq->cb.trigger = cb->trigger;
534         spin_unlock(&vq->irq_lock);
535 }
536
537 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
538 {
539         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
540         struct vduse_virtqueue *vq = dev->vqs[idx];
541
542         vq->num = num;
543 }
544
545 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
546                                         u16 idx, bool ready)
547 {
548         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
549         struct vduse_virtqueue *vq = dev->vqs[idx];
550
551         vq->ready = ready;
552 }
553
554 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
555 {
556         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
557         struct vduse_virtqueue *vq = dev->vqs[idx];
558
559         return vq->ready;
560 }
561
562 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
563                                 const struct vdpa_vq_state *state)
564 {
565         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
566         struct vduse_virtqueue *vq = dev->vqs[idx];
567
568         if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
569                 vq->state.packed.last_avail_counter =
570                                 state->packed.last_avail_counter;
571                 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
572                 vq->state.packed.last_used_counter =
573                                 state->packed.last_used_counter;
574                 vq->state.packed.last_used_idx = state->packed.last_used_idx;
575         } else
576                 vq->state.split.avail_index = state->split.avail_index;
577
578         return 0;
579 }
580
581 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
582                                 struct vdpa_vq_state *state)
583 {
584         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
585         struct vduse_virtqueue *vq = dev->vqs[idx];
586
587         if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
588                 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
589
590         return vduse_dev_get_vq_state_split(dev, vq, &state->split);
591 }
592
593 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
594 {
595         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
596
597         return dev->vq_align;
598 }
599
600 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
601 {
602         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
603
604         return dev->device_features;
605 }
606
607 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
608 {
609         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
610
611         dev->driver_features = features;
612         return 0;
613 }
614
615 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
616 {
617         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
618
619         return dev->driver_features;
620 }
621
622 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
623                                   struct vdpa_callback *cb)
624 {
625         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
626
627         spin_lock(&dev->irq_lock);
628         dev->config_cb.callback = cb->callback;
629         dev->config_cb.private = cb->private;
630         spin_unlock(&dev->irq_lock);
631 }
632
633 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
634 {
635         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
636         u16 num_max = 0;
637         int i;
638
639         for (i = 0; i < dev->vq_num; i++)
640                 if (num_max < dev->vqs[i]->num_max)
641                         num_max = dev->vqs[i]->num_max;
642
643         return num_max;
644 }
645
646 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
647 {
648         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
649
650         return dev->device_id;
651 }
652
653 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
654 {
655         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
656
657         return dev->vendor_id;
658 }
659
660 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
661 {
662         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
663
664         return dev->status;
665 }
666
667 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
668 {
669         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
670
671         if (vduse_dev_set_status(dev, status))
672                 return;
673
674         dev->status = status;
675 }
676
677 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
678 {
679         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
680
681         return dev->config_size;
682 }
683
684 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
685                                   void *buf, unsigned int len)
686 {
687         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
688
689         /* Initialize the buffer in case of partial copy. */
690         memset(buf, 0, len);
691
692         if (offset > dev->config_size)
693                 return;
694
695         if (len > dev->config_size - offset)
696                 len = dev->config_size - offset;
697
698         memcpy(buf, dev->config + offset, len);
699 }
700
701 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
702                         const void *buf, unsigned int len)
703 {
704         /* Now we only support read-only configuration space */
705 }
706
707 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
708 {
709         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
710         int ret = vduse_dev_set_status(dev, 0);
711
712         vduse_dev_reset(dev);
713
714         return ret;
715 }
716
717 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
718 {
719         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
720
721         return dev->generation;
722 }
723
724 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
725                                       const struct cpumask *cpu_mask)
726 {
727         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
728
729         cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
730         return 0;
731 }
732
733 static const struct cpumask *
734 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
735 {
736         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
737
738         return &dev->vqs[idx]->irq_affinity;
739 }
740
741 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
742                                 unsigned int asid,
743                                 struct vhost_iotlb *iotlb)
744 {
745         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
746         int ret;
747
748         ret = vduse_domain_set_map(dev->domain, iotlb);
749         if (ret)
750                 return ret;
751
752         ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
753         if (ret) {
754                 vduse_domain_clear_map(dev->domain, iotlb);
755                 return ret;
756         }
757
758         return 0;
759 }
760
761 static void vduse_vdpa_free(struct vdpa_device *vdpa)
762 {
763         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
764
765         dev->vdev = NULL;
766 }
767
768 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
769         .set_vq_address         = vduse_vdpa_set_vq_address,
770         .kick_vq                = vduse_vdpa_kick_vq,
771         .set_vq_cb              = vduse_vdpa_set_vq_cb,
772         .set_vq_num             = vduse_vdpa_set_vq_num,
773         .set_vq_ready           = vduse_vdpa_set_vq_ready,
774         .get_vq_ready           = vduse_vdpa_get_vq_ready,
775         .set_vq_state           = vduse_vdpa_set_vq_state,
776         .get_vq_state           = vduse_vdpa_get_vq_state,
777         .get_vq_align           = vduse_vdpa_get_vq_align,
778         .get_device_features    = vduse_vdpa_get_device_features,
779         .set_driver_features    = vduse_vdpa_set_driver_features,
780         .get_driver_features    = vduse_vdpa_get_driver_features,
781         .set_config_cb          = vduse_vdpa_set_config_cb,
782         .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
783         .get_device_id          = vduse_vdpa_get_device_id,
784         .get_vendor_id          = vduse_vdpa_get_vendor_id,
785         .get_status             = vduse_vdpa_get_status,
786         .set_status             = vduse_vdpa_set_status,
787         .get_config_size        = vduse_vdpa_get_config_size,
788         .get_config             = vduse_vdpa_get_config,
789         .set_config             = vduse_vdpa_set_config,
790         .get_generation         = vduse_vdpa_get_generation,
791         .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
792         .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
793         .reset                  = vduse_vdpa_reset,
794         .set_map                = vduse_vdpa_set_map,
795         .free                   = vduse_vdpa_free,
796 };
797
798 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
799                                      unsigned long offset, size_t size,
800                                      enum dma_data_direction dir,
801                                      unsigned long attrs)
802 {
803         struct vduse_dev *vdev = dev_to_vduse(dev);
804         struct vduse_iova_domain *domain = vdev->domain;
805
806         return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
807 }
808
809 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
810                                 size_t size, enum dma_data_direction dir,
811                                 unsigned long attrs)
812 {
813         struct vduse_dev *vdev = dev_to_vduse(dev);
814         struct vduse_iova_domain *domain = vdev->domain;
815
816         return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
817 }
818
819 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
820                                         dma_addr_t *dma_addr, gfp_t flag,
821                                         unsigned long attrs)
822 {
823         struct vduse_dev *vdev = dev_to_vduse(dev);
824         struct vduse_iova_domain *domain = vdev->domain;
825         unsigned long iova;
826         void *addr;
827
828         *dma_addr = DMA_MAPPING_ERROR;
829         addr = vduse_domain_alloc_coherent(domain, size,
830                                 (dma_addr_t *)&iova, flag, attrs);
831         if (!addr)
832                 return NULL;
833
834         *dma_addr = (dma_addr_t)iova;
835
836         return addr;
837 }
838
839 static void vduse_dev_free_coherent(struct device *dev, size_t size,
840                                         void *vaddr, dma_addr_t dma_addr,
841                                         unsigned long attrs)
842 {
843         struct vduse_dev *vdev = dev_to_vduse(dev);
844         struct vduse_iova_domain *domain = vdev->domain;
845
846         vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
847 }
848
849 static size_t vduse_dev_max_mapping_size(struct device *dev)
850 {
851         struct vduse_dev *vdev = dev_to_vduse(dev);
852         struct vduse_iova_domain *domain = vdev->domain;
853
854         return domain->bounce_size;
855 }
856
857 static const struct dma_map_ops vduse_dev_dma_ops = {
858         .map_page = vduse_dev_map_page,
859         .unmap_page = vduse_dev_unmap_page,
860         .alloc = vduse_dev_alloc_coherent,
861         .free = vduse_dev_free_coherent,
862         .max_mapping_size = vduse_dev_max_mapping_size,
863 };
864
865 static unsigned int perm_to_file_flags(u8 perm)
866 {
867         unsigned int flags = 0;
868
869         switch (perm) {
870         case VDUSE_ACCESS_WO:
871                 flags |= O_WRONLY;
872                 break;
873         case VDUSE_ACCESS_RO:
874                 flags |= O_RDONLY;
875                 break;
876         case VDUSE_ACCESS_RW:
877                 flags |= O_RDWR;
878                 break;
879         default:
880                 WARN(1, "invalidate vhost IOTLB permission\n");
881                 break;
882         }
883
884         return flags;
885 }
886
887 static int vduse_kickfd_setup(struct vduse_dev *dev,
888                         struct vduse_vq_eventfd *eventfd)
889 {
890         struct eventfd_ctx *ctx = NULL;
891         struct vduse_virtqueue *vq;
892         u32 index;
893
894         if (eventfd->index >= dev->vq_num)
895                 return -EINVAL;
896
897         index = array_index_nospec(eventfd->index, dev->vq_num);
898         vq = dev->vqs[index];
899         if (eventfd->fd >= 0) {
900                 ctx = eventfd_ctx_fdget(eventfd->fd);
901                 if (IS_ERR(ctx))
902                         return PTR_ERR(ctx);
903         } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
904                 return 0;
905
906         spin_lock(&vq->kick_lock);
907         if (vq->kickfd)
908                 eventfd_ctx_put(vq->kickfd);
909         vq->kickfd = ctx;
910         if (vq->ready && vq->kicked && vq->kickfd) {
911                 eventfd_signal(vq->kickfd, 1);
912                 vq->kicked = false;
913         }
914         spin_unlock(&vq->kick_lock);
915
916         return 0;
917 }
918
919 static bool vduse_dev_is_ready(struct vduse_dev *dev)
920 {
921         int i;
922
923         for (i = 0; i < dev->vq_num; i++)
924                 if (!dev->vqs[i]->num_max)
925                         return false;
926
927         return true;
928 }
929
930 static void vduse_dev_irq_inject(struct work_struct *work)
931 {
932         struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
933
934         spin_lock_irq(&dev->irq_lock);
935         if (dev->config_cb.callback)
936                 dev->config_cb.callback(dev->config_cb.private);
937         spin_unlock_irq(&dev->irq_lock);
938 }
939
940 static void vduse_vq_irq_inject(struct work_struct *work)
941 {
942         struct vduse_virtqueue *vq = container_of(work,
943                                         struct vduse_virtqueue, inject);
944
945         spin_lock_irq(&vq->irq_lock);
946         if (vq->ready && vq->cb.callback)
947                 vq->cb.callback(vq->cb.private);
948         spin_unlock_irq(&vq->irq_lock);
949 }
950
951 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
952 {
953         bool signal = false;
954
955         if (!vq->cb.trigger)
956                 return false;
957
958         spin_lock_irq(&vq->irq_lock);
959         if (vq->ready && vq->cb.trigger) {
960                 eventfd_signal(vq->cb.trigger, 1);
961                 signal = true;
962         }
963         spin_unlock_irq(&vq->irq_lock);
964
965         return signal;
966 }
967
968 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
969                                     struct work_struct *irq_work,
970                                     int irq_effective_cpu)
971 {
972         int ret = -EINVAL;
973
974         down_read(&dev->rwsem);
975         if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
976                 goto unlock;
977
978         ret = 0;
979         if (irq_effective_cpu == IRQ_UNBOUND)
980                 queue_work(vduse_irq_wq, irq_work);
981         else
982                 queue_work_on(irq_effective_cpu,
983                               vduse_irq_bound_wq, irq_work);
984 unlock:
985         up_read(&dev->rwsem);
986
987         return ret;
988 }
989
990 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
991                                 u64 iova, u64 size)
992 {
993         int ret;
994
995         mutex_lock(&dev->mem_lock);
996         ret = -ENOENT;
997         if (!dev->umem)
998                 goto unlock;
999
1000         ret = -EINVAL;
1001         if (!dev->domain)
1002                 goto unlock;
1003
1004         if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1005                 goto unlock;
1006
1007         vduse_domain_remove_user_bounce_pages(dev->domain);
1008         unpin_user_pages_dirty_lock(dev->umem->pages,
1009                                     dev->umem->npages, true);
1010         atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1011         mmdrop(dev->umem->mm);
1012         vfree(dev->umem->pages);
1013         kfree(dev->umem);
1014         dev->umem = NULL;
1015         ret = 0;
1016 unlock:
1017         mutex_unlock(&dev->mem_lock);
1018         return ret;
1019 }
1020
1021 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1022                               u64 iova, u64 uaddr, u64 size)
1023 {
1024         struct page **page_list = NULL;
1025         struct vduse_umem *umem = NULL;
1026         long pinned = 0;
1027         unsigned long npages, lock_limit;
1028         int ret;
1029
1030         if (!dev->domain || !dev->domain->bounce_map ||
1031             size != dev->domain->bounce_size ||
1032             iova != 0 || uaddr & ~PAGE_MASK)
1033                 return -EINVAL;
1034
1035         mutex_lock(&dev->mem_lock);
1036         ret = -EEXIST;
1037         if (dev->umem)
1038                 goto unlock;
1039
1040         ret = -ENOMEM;
1041         npages = size >> PAGE_SHIFT;
1042         page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1043                               GFP_KERNEL_ACCOUNT);
1044         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1045         if (!page_list || !umem)
1046                 goto unlock;
1047
1048         mmap_read_lock(current->mm);
1049
1050         lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1051         if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
1052                 goto out;
1053
1054         pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1055                                 page_list, NULL);
1056         if (pinned != npages) {
1057                 ret = pinned < 0 ? pinned : -ENOMEM;
1058                 goto out;
1059         }
1060
1061         ret = vduse_domain_add_user_bounce_pages(dev->domain,
1062                                                  page_list, pinned);
1063         if (ret)
1064                 goto out;
1065
1066         atomic64_add(npages, &current->mm->pinned_vm);
1067
1068         umem->pages = page_list;
1069         umem->npages = pinned;
1070         umem->iova = iova;
1071         umem->mm = current->mm;
1072         mmgrab(current->mm);
1073
1074         dev->umem = umem;
1075 out:
1076         if (ret && pinned > 0)
1077                 unpin_user_pages(page_list, pinned);
1078
1079         mmap_read_unlock(current->mm);
1080 unlock:
1081         if (ret) {
1082                 vfree(page_list);
1083                 kfree(umem);
1084         }
1085         mutex_unlock(&dev->mem_lock);
1086         return ret;
1087 }
1088
1089 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1090 {
1091         int curr_cpu = vq->irq_effective_cpu;
1092
1093         while (true) {
1094                 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1095                 if (cpu_online(curr_cpu))
1096                         break;
1097
1098                 if (curr_cpu >= nr_cpu_ids)
1099                         curr_cpu = IRQ_UNBOUND;
1100         }
1101
1102         vq->irq_effective_cpu = curr_cpu;
1103 }
1104
1105 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1106                             unsigned long arg)
1107 {
1108         struct vduse_dev *dev = file->private_data;
1109         void __user *argp = (void __user *)arg;
1110         int ret;
1111
1112         if (unlikely(dev->broken))
1113                 return -EPERM;
1114
1115         switch (cmd) {
1116         case VDUSE_IOTLB_GET_FD: {
1117                 struct vduse_iotlb_entry entry;
1118                 struct vhost_iotlb_map *map;
1119                 struct vdpa_map_file *map_file;
1120                 struct file *f = NULL;
1121
1122                 ret = -EFAULT;
1123                 if (copy_from_user(&entry, argp, sizeof(entry)))
1124                         break;
1125
1126                 ret = -EINVAL;
1127                 if (entry.start > entry.last)
1128                         break;
1129
1130                 mutex_lock(&dev->domain_lock);
1131                 if (!dev->domain) {
1132                         mutex_unlock(&dev->domain_lock);
1133                         break;
1134                 }
1135                 spin_lock(&dev->domain->iotlb_lock);
1136                 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1137                                               entry.start, entry.last);
1138                 if (map) {
1139                         map_file = (struct vdpa_map_file *)map->opaque;
1140                         f = get_file(map_file->file);
1141                         entry.offset = map_file->offset;
1142                         entry.start = map->start;
1143                         entry.last = map->last;
1144                         entry.perm = map->perm;
1145                 }
1146                 spin_unlock(&dev->domain->iotlb_lock);
1147                 mutex_unlock(&dev->domain_lock);
1148                 ret = -EINVAL;
1149                 if (!f)
1150                         break;
1151
1152                 ret = -EFAULT;
1153                 if (copy_to_user(argp, &entry, sizeof(entry))) {
1154                         fput(f);
1155                         break;
1156                 }
1157                 ret = receive_fd(f, perm_to_file_flags(entry.perm));
1158                 fput(f);
1159                 break;
1160         }
1161         case VDUSE_DEV_GET_FEATURES:
1162                 /*
1163                  * Just mirror what driver wrote here.
1164                  * The driver is expected to check FEATURE_OK later.
1165                  */
1166                 ret = put_user(dev->driver_features, (u64 __user *)argp);
1167                 break;
1168         case VDUSE_DEV_SET_CONFIG: {
1169                 struct vduse_config_data config;
1170                 unsigned long size = offsetof(struct vduse_config_data,
1171                                               buffer);
1172
1173                 ret = -EFAULT;
1174                 if (copy_from_user(&config, argp, size))
1175                         break;
1176
1177                 ret = -EINVAL;
1178                 if (config.offset > dev->config_size ||
1179                     config.length == 0 ||
1180                     config.length > dev->config_size - config.offset)
1181                         break;
1182
1183                 ret = -EFAULT;
1184                 if (copy_from_user(dev->config + config.offset, argp + size,
1185                                    config.length))
1186                         break;
1187
1188                 ret = 0;
1189                 break;
1190         }
1191         case VDUSE_DEV_INJECT_CONFIG_IRQ:
1192                 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1193                 break;
1194         case VDUSE_VQ_SETUP: {
1195                 struct vduse_vq_config config;
1196                 u32 index;
1197
1198                 ret = -EFAULT;
1199                 if (copy_from_user(&config, argp, sizeof(config)))
1200                         break;
1201
1202                 ret = -EINVAL;
1203                 if (config.index >= dev->vq_num)
1204                         break;
1205
1206                 if (!is_mem_zero((const char *)config.reserved,
1207                                  sizeof(config.reserved)))
1208                         break;
1209
1210                 index = array_index_nospec(config.index, dev->vq_num);
1211                 dev->vqs[index]->num_max = config.max_size;
1212                 ret = 0;
1213                 break;
1214         }
1215         case VDUSE_VQ_GET_INFO: {
1216                 struct vduse_vq_info vq_info;
1217                 struct vduse_virtqueue *vq;
1218                 u32 index;
1219
1220                 ret = -EFAULT;
1221                 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1222                         break;
1223
1224                 ret = -EINVAL;
1225                 if (vq_info.index >= dev->vq_num)
1226                         break;
1227
1228                 index = array_index_nospec(vq_info.index, dev->vq_num);
1229                 vq = dev->vqs[index];
1230                 vq_info.desc_addr = vq->desc_addr;
1231                 vq_info.driver_addr = vq->driver_addr;
1232                 vq_info.device_addr = vq->device_addr;
1233                 vq_info.num = vq->num;
1234
1235                 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1236                         vq_info.packed.last_avail_counter =
1237                                 vq->state.packed.last_avail_counter;
1238                         vq_info.packed.last_avail_idx =
1239                                 vq->state.packed.last_avail_idx;
1240                         vq_info.packed.last_used_counter =
1241                                 vq->state.packed.last_used_counter;
1242                         vq_info.packed.last_used_idx =
1243                                 vq->state.packed.last_used_idx;
1244                 } else
1245                         vq_info.split.avail_index =
1246                                 vq->state.split.avail_index;
1247
1248                 vq_info.ready = vq->ready;
1249
1250                 ret = -EFAULT;
1251                 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1252                         break;
1253
1254                 ret = 0;
1255                 break;
1256         }
1257         case VDUSE_VQ_SETUP_KICKFD: {
1258                 struct vduse_vq_eventfd eventfd;
1259
1260                 ret = -EFAULT;
1261                 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1262                         break;
1263
1264                 ret = vduse_kickfd_setup(dev, &eventfd);
1265                 break;
1266         }
1267         case VDUSE_VQ_INJECT_IRQ: {
1268                 u32 index;
1269
1270                 ret = -EFAULT;
1271                 if (get_user(index, (u32 __user *)argp))
1272                         break;
1273
1274                 ret = -EINVAL;
1275                 if (index >= dev->vq_num)
1276                         break;
1277
1278                 ret = 0;
1279                 index = array_index_nospec(index, dev->vq_num);
1280                 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1281                         vduse_vq_update_effective_cpu(dev->vqs[index]);
1282                         ret = vduse_dev_queue_irq_work(dev,
1283                                                 &dev->vqs[index]->inject,
1284                                                 dev->vqs[index]->irq_effective_cpu);
1285                 }
1286                 break;
1287         }
1288         case VDUSE_IOTLB_REG_UMEM: {
1289                 struct vduse_iova_umem umem;
1290
1291                 ret = -EFAULT;
1292                 if (copy_from_user(&umem, argp, sizeof(umem)))
1293                         break;
1294
1295                 ret = -EINVAL;
1296                 if (!is_mem_zero((const char *)umem.reserved,
1297                                  sizeof(umem.reserved)))
1298                         break;
1299
1300                 mutex_lock(&dev->domain_lock);
1301                 ret = vduse_dev_reg_umem(dev, umem.iova,
1302                                          umem.uaddr, umem.size);
1303                 mutex_unlock(&dev->domain_lock);
1304                 break;
1305         }
1306         case VDUSE_IOTLB_DEREG_UMEM: {
1307                 struct vduse_iova_umem umem;
1308
1309                 ret = -EFAULT;
1310                 if (copy_from_user(&umem, argp, sizeof(umem)))
1311                         break;
1312
1313                 ret = -EINVAL;
1314                 if (!is_mem_zero((const char *)umem.reserved,
1315                                  sizeof(umem.reserved)))
1316                         break;
1317                 mutex_lock(&dev->domain_lock);
1318                 ret = vduse_dev_dereg_umem(dev, umem.iova,
1319                                            umem.size);
1320                 mutex_unlock(&dev->domain_lock);
1321                 break;
1322         }
1323         case VDUSE_IOTLB_GET_INFO: {
1324                 struct vduse_iova_info info;
1325                 struct vhost_iotlb_map *map;
1326
1327                 ret = -EFAULT;
1328                 if (copy_from_user(&info, argp, sizeof(info)))
1329                         break;
1330
1331                 ret = -EINVAL;
1332                 if (info.start > info.last)
1333                         break;
1334
1335                 if (!is_mem_zero((const char *)info.reserved,
1336                                  sizeof(info.reserved)))
1337                         break;
1338
1339                 mutex_lock(&dev->domain_lock);
1340                 if (!dev->domain) {
1341                         mutex_unlock(&dev->domain_lock);
1342                         break;
1343                 }
1344                 spin_lock(&dev->domain->iotlb_lock);
1345                 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1346                                               info.start, info.last);
1347                 if (map) {
1348                         info.start = map->start;
1349                         info.last = map->last;
1350                         info.capability = 0;
1351                         if (dev->domain->bounce_map && map->start == 0 &&
1352                             map->last == dev->domain->bounce_size - 1)
1353                                 info.capability |= VDUSE_IOVA_CAP_UMEM;
1354                 }
1355                 spin_unlock(&dev->domain->iotlb_lock);
1356                 mutex_unlock(&dev->domain_lock);
1357                 if (!map)
1358                         break;
1359
1360                 ret = -EFAULT;
1361                 if (copy_to_user(argp, &info, sizeof(info)))
1362                         break;
1363
1364                 ret = 0;
1365                 break;
1366         }
1367         default:
1368                 ret = -ENOIOCTLCMD;
1369                 break;
1370         }
1371
1372         return ret;
1373 }
1374
1375 static int vduse_dev_release(struct inode *inode, struct file *file)
1376 {
1377         struct vduse_dev *dev = file->private_data;
1378
1379         mutex_lock(&dev->domain_lock);
1380         if (dev->domain)
1381                 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1382         mutex_unlock(&dev->domain_lock);
1383         spin_lock(&dev->msg_lock);
1384         /* Make sure the inflight messages can processed after reconncection */
1385         list_splice_init(&dev->recv_list, &dev->send_list);
1386         spin_unlock(&dev->msg_lock);
1387         dev->connected = false;
1388
1389         return 0;
1390 }
1391
1392 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1393 {
1394         struct vduse_dev *dev;
1395
1396         mutex_lock(&vduse_lock);
1397         dev = idr_find(&vduse_idr, minor);
1398         mutex_unlock(&vduse_lock);
1399
1400         return dev;
1401 }
1402
1403 static int vduse_dev_open(struct inode *inode, struct file *file)
1404 {
1405         int ret;
1406         struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1407
1408         if (!dev)
1409                 return -ENODEV;
1410
1411         ret = -EBUSY;
1412         mutex_lock(&dev->lock);
1413         if (dev->connected)
1414                 goto unlock;
1415
1416         ret = 0;
1417         dev->connected = true;
1418         file->private_data = dev;
1419 unlock:
1420         mutex_unlock(&dev->lock);
1421
1422         return ret;
1423 }
1424
1425 static const struct file_operations vduse_dev_fops = {
1426         .owner          = THIS_MODULE,
1427         .open           = vduse_dev_open,
1428         .release        = vduse_dev_release,
1429         .read_iter      = vduse_dev_read_iter,
1430         .write_iter     = vduse_dev_write_iter,
1431         .poll           = vduse_dev_poll,
1432         .unlocked_ioctl = vduse_dev_ioctl,
1433         .compat_ioctl   = compat_ptr_ioctl,
1434         .llseek         = noop_llseek,
1435 };
1436
1437 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1438 {
1439         return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1440 }
1441
1442 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1443                                      const char *buf, size_t count)
1444 {
1445         cpumask_var_t new_value;
1446         int ret;
1447
1448         if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1449                 return -ENOMEM;
1450
1451         ret = cpumask_parse(buf, new_value);
1452         if (ret)
1453                 goto free_mask;
1454
1455         ret = -EINVAL;
1456         if (!cpumask_intersects(new_value, cpu_online_mask))
1457                 goto free_mask;
1458
1459         cpumask_copy(&vq->irq_affinity, new_value);
1460         ret = count;
1461 free_mask:
1462         free_cpumask_var(new_value);
1463         return ret;
1464 }
1465
1466 struct vq_sysfs_entry {
1467         struct attribute attr;
1468         ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1469         ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1470                          size_t count);
1471 };
1472
1473 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1474
1475 static struct attribute *vq_attrs[] = {
1476         &irq_cb_affinity_attr.attr,
1477         NULL,
1478 };
1479 ATTRIBUTE_GROUPS(vq);
1480
1481 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1482                             char *buf)
1483 {
1484         struct vduse_virtqueue *vq = container_of(kobj,
1485                                         struct vduse_virtqueue, kobj);
1486         struct vq_sysfs_entry *entry = container_of(attr,
1487                                         struct vq_sysfs_entry, attr);
1488
1489         if (!entry->show)
1490                 return -EIO;
1491
1492         return entry->show(vq, buf);
1493 }
1494
1495 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1496                              const char *buf, size_t count)
1497 {
1498         struct vduse_virtqueue *vq = container_of(kobj,
1499                                         struct vduse_virtqueue, kobj);
1500         struct vq_sysfs_entry *entry = container_of(attr,
1501                                         struct vq_sysfs_entry, attr);
1502
1503         if (!entry->store)
1504                 return -EIO;
1505
1506         return entry->store(vq, buf, count);
1507 }
1508
1509 static const struct sysfs_ops vq_sysfs_ops = {
1510         .show = vq_attr_show,
1511         .store = vq_attr_store,
1512 };
1513
1514 static void vq_release(struct kobject *kobj)
1515 {
1516         struct vduse_virtqueue *vq = container_of(kobj,
1517                                         struct vduse_virtqueue, kobj);
1518         kfree(vq);
1519 }
1520
1521 static const struct kobj_type vq_type = {
1522         .release        = vq_release,
1523         .sysfs_ops      = &vq_sysfs_ops,
1524         .default_groups = vq_groups,
1525 };
1526
1527 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1528 {
1529         int i;
1530
1531         if (!dev->vqs)
1532                 return;
1533
1534         for (i = 0; i < dev->vq_num; i++)
1535                 kobject_put(&dev->vqs[i]->kobj);
1536         kfree(dev->vqs);
1537 }
1538
1539 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1540 {
1541         int ret, i;
1542
1543         dev->vq_align = vq_align;
1544         dev->vq_num = vq_num;
1545         dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1546         if (!dev->vqs)
1547                 return -ENOMEM;
1548
1549         for (i = 0; i < vq_num; i++) {
1550                 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1551                 if (!dev->vqs[i]) {
1552                         ret = -ENOMEM;
1553                         goto err;
1554                 }
1555
1556                 dev->vqs[i]->index = i;
1557                 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1558                 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1559                 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1560                 spin_lock_init(&dev->vqs[i]->kick_lock);
1561                 spin_lock_init(&dev->vqs[i]->irq_lock);
1562                 cpumask_setall(&dev->vqs[i]->irq_affinity);
1563
1564                 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1565                 ret = kobject_add(&dev->vqs[i]->kobj,
1566                                   &dev->dev->kobj, "vq%d", i);
1567                 if (ret) {
1568                         kfree(dev->vqs[i]);
1569                         goto err;
1570                 }
1571         }
1572
1573         return 0;
1574 err:
1575         while (i--)
1576                 kobject_put(&dev->vqs[i]->kobj);
1577         kfree(dev->vqs);
1578         dev->vqs = NULL;
1579         return ret;
1580 }
1581
1582 static struct vduse_dev *vduse_dev_create(void)
1583 {
1584         struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1585
1586         if (!dev)
1587                 return NULL;
1588
1589         mutex_init(&dev->lock);
1590         mutex_init(&dev->mem_lock);
1591         mutex_init(&dev->domain_lock);
1592         spin_lock_init(&dev->msg_lock);
1593         INIT_LIST_HEAD(&dev->send_list);
1594         INIT_LIST_HEAD(&dev->recv_list);
1595         spin_lock_init(&dev->irq_lock);
1596         init_rwsem(&dev->rwsem);
1597
1598         INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1599         init_waitqueue_head(&dev->waitq);
1600
1601         return dev;
1602 }
1603
1604 static void vduse_dev_destroy(struct vduse_dev *dev)
1605 {
1606         kfree(dev);
1607 }
1608
1609 static struct vduse_dev *vduse_find_dev(const char *name)
1610 {
1611         struct vduse_dev *dev;
1612         int id;
1613
1614         idr_for_each_entry(&vduse_idr, dev, id)
1615                 if (!strcmp(dev->name, name))
1616                         return dev;
1617
1618         return NULL;
1619 }
1620
1621 static int vduse_destroy_dev(char *name)
1622 {
1623         struct vduse_dev *dev = vduse_find_dev(name);
1624
1625         if (!dev)
1626                 return -EINVAL;
1627
1628         mutex_lock(&dev->lock);
1629         if (dev->vdev || dev->connected) {
1630                 mutex_unlock(&dev->lock);
1631                 return -EBUSY;
1632         }
1633         dev->connected = true;
1634         mutex_unlock(&dev->lock);
1635
1636         vduse_dev_reset(dev);
1637         device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1638         idr_remove(&vduse_idr, dev->minor);
1639         kvfree(dev->config);
1640         vduse_dev_deinit_vqs(dev);
1641         if (dev->domain)
1642                 vduse_domain_destroy(dev->domain);
1643         kfree(dev->name);
1644         vduse_dev_destroy(dev);
1645         module_put(THIS_MODULE);
1646
1647         return 0;
1648 }
1649
1650 static bool device_is_allowed(u32 device_id)
1651 {
1652         int i;
1653
1654         for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1655                 if (allowed_device_id[i] == device_id)
1656                         return true;
1657
1658         return false;
1659 }
1660
1661 static bool features_is_valid(u64 features)
1662 {
1663         if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1664                 return false;
1665
1666         /* Now we only support read-only configuration space */
1667         if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1668                 return false;
1669
1670         return true;
1671 }
1672
1673 static bool vduse_validate_config(struct vduse_dev_config *config)
1674 {
1675         if (!is_mem_zero((const char *)config->reserved,
1676                          sizeof(config->reserved)))
1677                 return false;
1678
1679         if (config->vq_align > PAGE_SIZE)
1680                 return false;
1681
1682         if (config->config_size > PAGE_SIZE)
1683                 return false;
1684
1685         if (config->vq_num > 0xffff)
1686                 return false;
1687
1688         if (!config->name[0])
1689                 return false;
1690
1691         if (!device_is_allowed(config->device_id))
1692                 return false;
1693
1694         if (!features_is_valid(config->features))
1695                 return false;
1696
1697         return true;
1698 }
1699
1700 static ssize_t msg_timeout_show(struct device *device,
1701                                 struct device_attribute *attr, char *buf)
1702 {
1703         struct vduse_dev *dev = dev_get_drvdata(device);
1704
1705         return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1706 }
1707
1708 static ssize_t msg_timeout_store(struct device *device,
1709                                  struct device_attribute *attr,
1710                                  const char *buf, size_t count)
1711 {
1712         struct vduse_dev *dev = dev_get_drvdata(device);
1713         int ret;
1714
1715         ret = kstrtouint(buf, 10, &dev->msg_timeout);
1716         if (ret < 0)
1717                 return ret;
1718
1719         return count;
1720 }
1721
1722 static DEVICE_ATTR_RW(msg_timeout);
1723
1724 static ssize_t bounce_size_show(struct device *device,
1725                                 struct device_attribute *attr, char *buf)
1726 {
1727         struct vduse_dev *dev = dev_get_drvdata(device);
1728
1729         return sysfs_emit(buf, "%u\n", dev->bounce_size);
1730 }
1731
1732 static ssize_t bounce_size_store(struct device *device,
1733                                  struct device_attribute *attr,
1734                                  const char *buf, size_t count)
1735 {
1736         struct vduse_dev *dev = dev_get_drvdata(device);
1737         unsigned int bounce_size;
1738         int ret;
1739
1740         ret = -EPERM;
1741         mutex_lock(&dev->domain_lock);
1742         if (dev->domain)
1743                 goto unlock;
1744
1745         ret = kstrtouint(buf, 10, &bounce_size);
1746         if (ret < 0)
1747                 goto unlock;
1748
1749         ret = -EINVAL;
1750         if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1751             bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1752                 goto unlock;
1753
1754         dev->bounce_size = bounce_size & PAGE_MASK;
1755         ret = count;
1756 unlock:
1757         mutex_unlock(&dev->domain_lock);
1758         return ret;
1759 }
1760
1761 static DEVICE_ATTR_RW(bounce_size);
1762
1763 static struct attribute *vduse_dev_attrs[] = {
1764         &dev_attr_msg_timeout.attr,
1765         &dev_attr_bounce_size.attr,
1766         NULL
1767 };
1768
1769 ATTRIBUTE_GROUPS(vduse_dev);
1770
1771 static int vduse_create_dev(struct vduse_dev_config *config,
1772                             void *config_buf, u64 api_version)
1773 {
1774         int ret;
1775         struct vduse_dev *dev;
1776
1777         ret = -EEXIST;
1778         if (vduse_find_dev(config->name))
1779                 goto err;
1780
1781         ret = -ENOMEM;
1782         dev = vduse_dev_create();
1783         if (!dev)
1784                 goto err;
1785
1786         dev->api_version = api_version;
1787         dev->device_features = config->features;
1788         dev->device_id = config->device_id;
1789         dev->vendor_id = config->vendor_id;
1790         dev->name = kstrdup(config->name, GFP_KERNEL);
1791         if (!dev->name)
1792                 goto err_str;
1793
1794         dev->bounce_size = VDUSE_BOUNCE_SIZE;
1795         dev->config = config_buf;
1796         dev->config_size = config->config_size;
1797
1798         ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1799         if (ret < 0)
1800                 goto err_idr;
1801
1802         dev->minor = ret;
1803         dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1804         dev->dev = device_create_with_groups(vduse_class, NULL,
1805                                 MKDEV(MAJOR(vduse_major), dev->minor),
1806                                 dev, vduse_dev_groups, "%s", config->name);
1807         if (IS_ERR(dev->dev)) {
1808                 ret = PTR_ERR(dev->dev);
1809                 goto err_dev;
1810         }
1811
1812         ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1813         if (ret)
1814                 goto err_vqs;
1815
1816         __module_get(THIS_MODULE);
1817
1818         return 0;
1819 err_vqs:
1820         device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1821 err_dev:
1822         idr_remove(&vduse_idr, dev->minor);
1823 err_idr:
1824         kfree(dev->name);
1825 err_str:
1826         vduse_dev_destroy(dev);
1827 err:
1828         return ret;
1829 }
1830
1831 static long vduse_ioctl(struct file *file, unsigned int cmd,
1832                         unsigned long arg)
1833 {
1834         int ret;
1835         void __user *argp = (void __user *)arg;
1836         struct vduse_control *control = file->private_data;
1837
1838         mutex_lock(&vduse_lock);
1839         switch (cmd) {
1840         case VDUSE_GET_API_VERSION:
1841                 ret = put_user(control->api_version, (u64 __user *)argp);
1842                 break;
1843         case VDUSE_SET_API_VERSION: {
1844                 u64 api_version;
1845
1846                 ret = -EFAULT;
1847                 if (get_user(api_version, (u64 __user *)argp))
1848                         break;
1849
1850                 ret = -EINVAL;
1851                 if (api_version > VDUSE_API_VERSION)
1852                         break;
1853
1854                 ret = 0;
1855                 control->api_version = api_version;
1856                 break;
1857         }
1858         case VDUSE_CREATE_DEV: {
1859                 struct vduse_dev_config config;
1860                 unsigned long size = offsetof(struct vduse_dev_config, config);
1861                 void *buf;
1862
1863                 ret = -EFAULT;
1864                 if (copy_from_user(&config, argp, size))
1865                         break;
1866
1867                 ret = -EINVAL;
1868                 if (vduse_validate_config(&config) == false)
1869                         break;
1870
1871                 buf = vmemdup_user(argp + size, config.config_size);
1872                 if (IS_ERR(buf)) {
1873                         ret = PTR_ERR(buf);
1874                         break;
1875                 }
1876                 config.name[VDUSE_NAME_MAX - 1] = '\0';
1877                 ret = vduse_create_dev(&config, buf, control->api_version);
1878                 if (ret)
1879                         kvfree(buf);
1880                 break;
1881         }
1882         case VDUSE_DESTROY_DEV: {
1883                 char name[VDUSE_NAME_MAX];
1884
1885                 ret = -EFAULT;
1886                 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1887                         break;
1888
1889                 name[VDUSE_NAME_MAX - 1] = '\0';
1890                 ret = vduse_destroy_dev(name);
1891                 break;
1892         }
1893         default:
1894                 ret = -EINVAL;
1895                 break;
1896         }
1897         mutex_unlock(&vduse_lock);
1898
1899         return ret;
1900 }
1901
1902 static int vduse_release(struct inode *inode, struct file *file)
1903 {
1904         struct vduse_control *control = file->private_data;
1905
1906         kfree(control);
1907         return 0;
1908 }
1909
1910 static int vduse_open(struct inode *inode, struct file *file)
1911 {
1912         struct vduse_control *control;
1913
1914         control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1915         if (!control)
1916                 return -ENOMEM;
1917
1918         control->api_version = VDUSE_API_VERSION;
1919         file->private_data = control;
1920
1921         return 0;
1922 }
1923
1924 static const struct file_operations vduse_ctrl_fops = {
1925         .owner          = THIS_MODULE,
1926         .open           = vduse_open,
1927         .release        = vduse_release,
1928         .unlocked_ioctl = vduse_ioctl,
1929         .compat_ioctl   = compat_ptr_ioctl,
1930         .llseek         = noop_llseek,
1931 };
1932
1933 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1934 {
1935         return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1936 }
1937
1938 struct vduse_mgmt_dev {
1939         struct vdpa_mgmt_dev mgmt_dev;
1940         struct device dev;
1941 };
1942
1943 static struct vduse_mgmt_dev *vduse_mgmt;
1944
1945 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1946 {
1947         struct vduse_vdpa *vdev;
1948         int ret;
1949
1950         if (dev->vdev)
1951                 return -EEXIST;
1952
1953         vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1954                                  &vduse_vdpa_config_ops, 1, 1, name, true);
1955         if (IS_ERR(vdev))
1956                 return PTR_ERR(vdev);
1957
1958         dev->vdev = vdev;
1959         vdev->dev = dev;
1960         vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1961         ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1962         if (ret) {
1963                 put_device(&vdev->vdpa.dev);
1964                 return ret;
1965         }
1966         set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1967         vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1968         vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1969
1970         return 0;
1971 }
1972
1973 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1974                         const struct vdpa_dev_set_config *config)
1975 {
1976         struct vduse_dev *dev;
1977         int ret;
1978
1979         mutex_lock(&vduse_lock);
1980         dev = vduse_find_dev(name);
1981         if (!dev || !vduse_dev_is_ready(dev)) {
1982                 mutex_unlock(&vduse_lock);
1983                 return -EINVAL;
1984         }
1985         ret = vduse_dev_init_vdpa(dev, name);
1986         mutex_unlock(&vduse_lock);
1987         if (ret)
1988                 return ret;
1989
1990         mutex_lock(&dev->domain_lock);
1991         if (!dev->domain)
1992                 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1993                                                   dev->bounce_size);
1994         mutex_unlock(&dev->domain_lock);
1995         if (!dev->domain) {
1996                 put_device(&dev->vdev->vdpa.dev);
1997                 return -ENOMEM;
1998         }
1999
2000         ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2001         if (ret) {
2002                 put_device(&dev->vdev->vdpa.dev);
2003                 mutex_lock(&dev->domain_lock);
2004                 vduse_domain_destroy(dev->domain);
2005                 dev->domain = NULL;
2006                 mutex_unlock(&dev->domain_lock);
2007                 return ret;
2008         }
2009
2010         return 0;
2011 }
2012
2013 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2014 {
2015         _vdpa_unregister_device(dev);
2016 }
2017
2018 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2019         .dev_add = vdpa_dev_add,
2020         .dev_del = vdpa_dev_del,
2021 };
2022
2023 static struct virtio_device_id id_table[] = {
2024         { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2025         { 0 },
2026 };
2027
2028 static void vduse_mgmtdev_release(struct device *dev)
2029 {
2030         struct vduse_mgmt_dev *mgmt_dev;
2031
2032         mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2033         kfree(mgmt_dev);
2034 }
2035
2036 static int vduse_mgmtdev_init(void)
2037 {
2038         int ret;
2039
2040         vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2041         if (!vduse_mgmt)
2042                 return -ENOMEM;
2043
2044         ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2045         if (ret) {
2046                 kfree(vduse_mgmt);
2047                 return ret;
2048         }
2049
2050         vduse_mgmt->dev.release = vduse_mgmtdev_release;
2051
2052         ret = device_register(&vduse_mgmt->dev);
2053         if (ret)
2054                 goto dev_reg_err;
2055
2056         vduse_mgmt->mgmt_dev.id_table = id_table;
2057         vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2058         vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2059         ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2060         if (ret)
2061                 device_unregister(&vduse_mgmt->dev);
2062
2063         return ret;
2064
2065 dev_reg_err:
2066         put_device(&vduse_mgmt->dev);
2067         return ret;
2068 }
2069
2070 static void vduse_mgmtdev_exit(void)
2071 {
2072         vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2073         device_unregister(&vduse_mgmt->dev);
2074 }
2075
2076 static int vduse_init(void)
2077 {
2078         int ret;
2079         struct device *dev;
2080
2081         vduse_class = class_create("vduse");
2082         if (IS_ERR(vduse_class))
2083                 return PTR_ERR(vduse_class);
2084
2085         vduse_class->devnode = vduse_devnode;
2086
2087         ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2088         if (ret)
2089                 goto err_chardev_region;
2090
2091         /* /dev/vduse/control */
2092         cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2093         vduse_ctrl_cdev.owner = THIS_MODULE;
2094         ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2095         if (ret)
2096                 goto err_ctrl_cdev;
2097
2098         dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
2099         if (IS_ERR(dev)) {
2100                 ret = PTR_ERR(dev);
2101                 goto err_device;
2102         }
2103
2104         /* /dev/vduse/$DEVICE */
2105         cdev_init(&vduse_cdev, &vduse_dev_fops);
2106         vduse_cdev.owner = THIS_MODULE;
2107         ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2108                        VDUSE_DEV_MAX - 1);
2109         if (ret)
2110                 goto err_cdev;
2111
2112         ret = -ENOMEM;
2113         vduse_irq_wq = alloc_workqueue("vduse-irq",
2114                                 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2115         if (!vduse_irq_wq)
2116                 goto err_wq;
2117
2118         vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2119         if (!vduse_irq_bound_wq)
2120                 goto err_bound_wq;
2121
2122         ret = vduse_domain_init();
2123         if (ret)
2124                 goto err_domain;
2125
2126         ret = vduse_mgmtdev_init();
2127         if (ret)
2128                 goto err_mgmtdev;
2129
2130         return 0;
2131 err_mgmtdev:
2132         vduse_domain_exit();
2133 err_domain:
2134         destroy_workqueue(vduse_irq_bound_wq);
2135 err_bound_wq:
2136         destroy_workqueue(vduse_irq_wq);
2137 err_wq:
2138         cdev_del(&vduse_cdev);
2139 err_cdev:
2140         device_destroy(vduse_class, vduse_major);
2141 err_device:
2142         cdev_del(&vduse_ctrl_cdev);
2143 err_ctrl_cdev:
2144         unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2145 err_chardev_region:
2146         class_destroy(vduse_class);
2147         return ret;
2148 }
2149 module_init(vduse_init);
2150
2151 static void vduse_exit(void)
2152 {
2153         vduse_mgmtdev_exit();
2154         vduse_domain_exit();
2155         destroy_workqueue(vduse_irq_bound_wq);
2156         destroy_workqueue(vduse_irq_wq);
2157         cdev_del(&vduse_cdev);
2158         device_destroy(vduse_class, vduse_major);
2159         cdev_del(&vduse_ctrl_cdev);
2160         unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2161         class_destroy(vduse_class);
2162 }
2163 module_exit(vduse_exit);
2164
2165 MODULE_LICENSE(DRV_LICENSE);
2166 MODULE_AUTHOR(DRV_AUTHOR);
2167 MODULE_DESCRIPTION(DRV_DESC);