OSDN Git Service

Merge tag 'block-5.6-2020-03-13' of git://git.kernel.dk/linux-block
[tomoyo/tomoyo-test1.git] / drivers / virtio / virtio_balloon.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtio balloon implementation, inspired by Dor Laor and Marcelo
4  * Tosatti's implementations.
5  *
6  *  Copyright 2008 Rusty Russell IBM Corporation
7  */
8
9 #include <linux/virtio.h>
10 #include <linux/virtio_balloon.h>
11 #include <linux/swap.h>
12 #include <linux/workqueue.h>
13 #include <linux/delay.h>
14 #include <linux/slab.h>
15 #include <linux/module.h>
16 #include <linux/balloon_compaction.h>
17 #include <linux/wait.h>
18 #include <linux/mm.h>
19 #include <linux/mount.h>
20 #include <linux/magic.h>
21 #include <linux/pseudo_fs.h>
22
23 /*
24  * Balloon device works in 4K page units.  So each page is pointed to by
25  * multiple balloon pages.  All memory counters in this driver are in balloon
26  * page units.
27  */
28 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
29 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
30 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
31
32 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
33                                              __GFP_NOMEMALLOC)
34 /* The order of free page blocks to report to host */
35 #define VIRTIO_BALLOON_HINT_BLOCK_ORDER (MAX_ORDER - 1)
36 /* The size of a free page block in bytes */
37 #define VIRTIO_BALLOON_HINT_BLOCK_BYTES \
38         (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
39 #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
40
41 #ifdef CONFIG_BALLOON_COMPACTION
42 static struct vfsmount *balloon_mnt;
43 #endif
44
45 enum virtio_balloon_vq {
46         VIRTIO_BALLOON_VQ_INFLATE,
47         VIRTIO_BALLOON_VQ_DEFLATE,
48         VIRTIO_BALLOON_VQ_STATS,
49         VIRTIO_BALLOON_VQ_FREE_PAGE,
50         VIRTIO_BALLOON_VQ_MAX
51 };
52
53 enum virtio_balloon_config_read {
54         VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0,
55 };
56
57 struct virtio_balloon {
58         struct virtio_device *vdev;
59         struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
60
61         /* Balloon's own wq for cpu-intensive work items */
62         struct workqueue_struct *balloon_wq;
63         /* The free page reporting work item submitted to the balloon wq */
64         struct work_struct report_free_page_work;
65
66         /* The balloon servicing is delegated to a freezable workqueue. */
67         struct work_struct update_balloon_stats_work;
68         struct work_struct update_balloon_size_work;
69
70         /* Prevent updating balloon when it is being canceled. */
71         spinlock_t stop_update_lock;
72         bool stop_update;
73         /* Bitmap to indicate if reading the related config fields are needed */
74         unsigned long config_read_bitmap;
75
76         /* The list of allocated free pages, waiting to be given back to mm */
77         struct list_head free_page_list;
78         spinlock_t free_page_list_lock;
79         /* The number of free page blocks on the above list */
80         unsigned long num_free_page_blocks;
81         /*
82          * The cmd id received from host.
83          * Read it via virtio_balloon_cmd_id_received to get the latest value
84          * sent from host.
85          */
86         u32 cmd_id_received_cache;
87         /* The cmd id that is actively in use */
88         __virtio32 cmd_id_active;
89         /* Buffer to store the stop sign */
90         __virtio32 cmd_id_stop;
91
92         /* Waiting for host to ack the pages we released. */
93         wait_queue_head_t acked;
94
95         /* Number of balloon pages we've told the Host we're not using. */
96         unsigned int num_pages;
97         /*
98          * The pages we've told the Host we're not using are enqueued
99          * at vb_dev_info->pages list.
100          * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
101          * to num_pages above.
102          */
103         struct balloon_dev_info vb_dev_info;
104
105         /* Synchronize access/update to this struct virtio_balloon elements */
106         struct mutex balloon_lock;
107
108         /* The array of pfns we tell the Host about. */
109         unsigned int num_pfns;
110         __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
111
112         /* Memory statistics */
113         struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
114
115         /* To register a shrinker to shrink memory upon memory pressure */
116         struct shrinker shrinker;
117 };
118
119 static struct virtio_device_id id_table[] = {
120         { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
121         { 0 },
122 };
123
124 static u32 page_to_balloon_pfn(struct page *page)
125 {
126         unsigned long pfn = page_to_pfn(page);
127
128         BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
129         /* Convert pfn from Linux page size to balloon page size. */
130         return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
131 }
132
133 static void balloon_ack(struct virtqueue *vq)
134 {
135         struct virtio_balloon *vb = vq->vdev->priv;
136
137         wake_up(&vb->acked);
138 }
139
140 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
141 {
142         struct scatterlist sg;
143         unsigned int len;
144
145         sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
146
147         /* We should always be able to add one buffer to an empty queue. */
148         virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
149         virtqueue_kick(vq);
150
151         /* When host has read buffer, this completes via balloon_ack */
152         wait_event(vb->acked, virtqueue_get_buf(vq, &len));
153
154 }
155
156 static void set_page_pfns(struct virtio_balloon *vb,
157                           __virtio32 pfns[], struct page *page)
158 {
159         unsigned int i;
160
161         BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX);
162
163         /*
164          * Set balloon pfns pointing at this page.
165          * Note that the first pfn points at start of the page.
166          */
167         for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
168                 pfns[i] = cpu_to_virtio32(vb->vdev,
169                                           page_to_balloon_pfn(page) + i);
170 }
171
172 static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
173 {
174         unsigned num_allocated_pages;
175         unsigned num_pfns;
176         struct page *page;
177         LIST_HEAD(pages);
178
179         /* We can only do one array worth at a time. */
180         num = min(num, ARRAY_SIZE(vb->pfns));
181
182         for (num_pfns = 0; num_pfns < num;
183              num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
184                 struct page *page = balloon_page_alloc();
185
186                 if (!page) {
187                         dev_info_ratelimited(&vb->vdev->dev,
188                                              "Out of puff! Can't get %u pages\n",
189                                              VIRTIO_BALLOON_PAGES_PER_PAGE);
190                         /* Sleep for at least 1/5 of a second before retry. */
191                         msleep(200);
192                         break;
193                 }
194
195                 balloon_page_push(&pages, page);
196         }
197
198         mutex_lock(&vb->balloon_lock);
199
200         vb->num_pfns = 0;
201
202         while ((page = balloon_page_pop(&pages))) {
203                 balloon_page_enqueue(&vb->vb_dev_info, page);
204
205                 set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
206                 vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
207                 if (!virtio_has_feature(vb->vdev,
208                                         VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
209                         adjust_managed_page_count(page, -1);
210                 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE;
211         }
212
213         num_allocated_pages = vb->num_pfns;
214         /* Did we get any? */
215         if (vb->num_pfns != 0)
216                 tell_host(vb, vb->inflate_vq);
217         mutex_unlock(&vb->balloon_lock);
218
219         return num_allocated_pages;
220 }
221
222 static void release_pages_balloon(struct virtio_balloon *vb,
223                                  struct list_head *pages)
224 {
225         struct page *page, *next;
226
227         list_for_each_entry_safe(page, next, pages, lru) {
228                 if (!virtio_has_feature(vb->vdev,
229                                         VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
230                         adjust_managed_page_count(page, 1);
231                 list_del(&page->lru);
232                 put_page(page); /* balloon reference */
233         }
234 }
235
236 static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
237 {
238         unsigned num_freed_pages;
239         struct page *page;
240         struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
241         LIST_HEAD(pages);
242
243         /* We can only do one array worth at a time. */
244         num = min(num, ARRAY_SIZE(vb->pfns));
245
246         mutex_lock(&vb->balloon_lock);
247         /* We can't release more pages than taken */
248         num = min(num, (size_t)vb->num_pages);
249         for (vb->num_pfns = 0; vb->num_pfns < num;
250              vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
251                 page = balloon_page_dequeue(vb_dev_info);
252                 if (!page)
253                         break;
254                 set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
255                 list_add(&page->lru, &pages);
256                 vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
257         }
258
259         num_freed_pages = vb->num_pfns;
260         /*
261          * Note that if
262          * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
263          * is true, we *have* to do it in this order
264          */
265         if (vb->num_pfns != 0)
266                 tell_host(vb, vb->deflate_vq);
267         release_pages_balloon(vb, &pages);
268         mutex_unlock(&vb->balloon_lock);
269         return num_freed_pages;
270 }
271
272 static inline void update_stat(struct virtio_balloon *vb, int idx,
273                                u16 tag, u64 val)
274 {
275         BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
276         vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag);
277         vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val);
278 }
279
280 #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
281
282 static unsigned int update_balloon_stats(struct virtio_balloon *vb)
283 {
284         unsigned long events[NR_VM_EVENT_ITEMS];
285         struct sysinfo i;
286         unsigned int idx = 0;
287         long available;
288         unsigned long caches;
289
290         all_vm_events(events);
291         si_meminfo(&i);
292
293         available = si_mem_available();
294         caches = global_node_page_state(NR_FILE_PAGES);
295
296 #ifdef CONFIG_VM_EVENT_COUNTERS
297         update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
298                                 pages_to_bytes(events[PSWPIN]));
299         update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
300                                 pages_to_bytes(events[PSWPOUT]));
301         update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
302         update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
303 #ifdef CONFIG_HUGETLB_PAGE
304         update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
305                     events[HTLB_BUDDY_PGALLOC]);
306         update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL,
307                     events[HTLB_BUDDY_PGALLOC_FAIL]);
308 #endif
309 #endif
310         update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
311                                 pages_to_bytes(i.freeram));
312         update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
313                                 pages_to_bytes(i.totalram));
314         update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL,
315                                 pages_to_bytes(available));
316         update_stat(vb, idx++, VIRTIO_BALLOON_S_CACHES,
317                                 pages_to_bytes(caches));
318
319         return idx;
320 }
321
322 /*
323  * While most virtqueues communicate guest-initiated requests to the hypervisor,
324  * the stats queue operates in reverse.  The driver initializes the virtqueue
325  * with a single buffer.  From that point forward, all conversations consist of
326  * a hypervisor request (a call to this function) which directs us to refill
327  * the virtqueue with a fresh stats buffer.  Since stats collection can sleep,
328  * we delegate the job to a freezable workqueue that will do the actual work via
329  * stats_handle_request().
330  */
331 static void stats_request(struct virtqueue *vq)
332 {
333         struct virtio_balloon *vb = vq->vdev->priv;
334
335         spin_lock(&vb->stop_update_lock);
336         if (!vb->stop_update)
337                 queue_work(system_freezable_wq, &vb->update_balloon_stats_work);
338         spin_unlock(&vb->stop_update_lock);
339 }
340
341 static void stats_handle_request(struct virtio_balloon *vb)
342 {
343         struct virtqueue *vq;
344         struct scatterlist sg;
345         unsigned int len, num_stats;
346
347         num_stats = update_balloon_stats(vb);
348
349         vq = vb->stats_vq;
350         if (!virtqueue_get_buf(vq, &len))
351                 return;
352         sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
353         virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
354         virtqueue_kick(vq);
355 }
356
357 static inline s64 towards_target(struct virtio_balloon *vb)
358 {
359         s64 target;
360         u32 num_pages;
361
362         virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages,
363                      &num_pages);
364
365         /* Legacy balloon config space is LE, unlike all other devices. */
366         if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
367                 num_pages = le32_to_cpu((__force __le32)num_pages);
368
369         target = num_pages;
370         return target - vb->num_pages;
371 }
372
373 /* Gives back @num_to_return blocks of free pages to mm. */
374 static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
375                                              unsigned long num_to_return)
376 {
377         struct page *page;
378         unsigned long num_returned;
379
380         spin_lock_irq(&vb->free_page_list_lock);
381         for (num_returned = 0; num_returned < num_to_return; num_returned++) {
382                 page = balloon_page_pop(&vb->free_page_list);
383                 if (!page)
384                         break;
385                 free_pages((unsigned long)page_address(page),
386                            VIRTIO_BALLOON_HINT_BLOCK_ORDER);
387         }
388         vb->num_free_page_blocks -= num_returned;
389         spin_unlock_irq(&vb->free_page_list_lock);
390
391         return num_returned;
392 }
393
394 static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb)
395 {
396         if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
397                 return;
398
399         /* No need to queue the work if the bit was already set. */
400         if (test_and_set_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
401                              &vb->config_read_bitmap))
402                 return;
403
404         queue_work(vb->balloon_wq, &vb->report_free_page_work);
405 }
406
407 static void virtballoon_changed(struct virtio_device *vdev)
408 {
409         struct virtio_balloon *vb = vdev->priv;
410         unsigned long flags;
411
412         spin_lock_irqsave(&vb->stop_update_lock, flags);
413         if (!vb->stop_update) {
414                 queue_work(system_freezable_wq,
415                            &vb->update_balloon_size_work);
416                 virtio_balloon_queue_free_page_work(vb);
417         }
418         spin_unlock_irqrestore(&vb->stop_update_lock, flags);
419 }
420
421 static void update_balloon_size(struct virtio_balloon *vb)
422 {
423         u32 actual = vb->num_pages;
424
425         /* Legacy balloon config space is LE, unlike all other devices. */
426         if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
427                 actual = (__force u32)cpu_to_le32(actual);
428
429         virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
430                       &actual);
431 }
432
433 static void update_balloon_stats_func(struct work_struct *work)
434 {
435         struct virtio_balloon *vb;
436
437         vb = container_of(work, struct virtio_balloon,
438                           update_balloon_stats_work);
439         stats_handle_request(vb);
440 }
441
442 static void update_balloon_size_func(struct work_struct *work)
443 {
444         struct virtio_balloon *vb;
445         s64 diff;
446
447         vb = container_of(work, struct virtio_balloon,
448                           update_balloon_size_work);
449         diff = towards_target(vb);
450
451         if (!diff)
452                 return;
453
454         if (diff > 0)
455                 diff -= fill_balloon(vb, diff);
456         else
457                 diff += leak_balloon(vb, -diff);
458         update_balloon_size(vb);
459
460         if (diff)
461                 queue_work(system_freezable_wq, work);
462 }
463
464 static int init_vqs(struct virtio_balloon *vb)
465 {
466         struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
467         vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
468         const char *names[VIRTIO_BALLOON_VQ_MAX];
469         int err;
470
471         /*
472          * Inflateq and deflateq are used unconditionally. The names[]
473          * will be NULL if the related feature is not enabled, which will
474          * cause no allocation for the corresponding virtqueue in find_vqs.
475          */
476         callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
477         names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
478         callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
479         names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
480         callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
481         names[VIRTIO_BALLOON_VQ_STATS] = NULL;
482         callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
483         names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
484
485         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
486                 names[VIRTIO_BALLOON_VQ_STATS] = "stats";
487                 callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
488         }
489
490         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
491                 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
492                 callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
493         }
494
495         err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
496                                          vqs, callbacks, names, NULL, NULL);
497         if (err)
498                 return err;
499
500         vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
501         vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
502         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
503                 struct scatterlist sg;
504                 unsigned int num_stats;
505                 vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
506
507                 /*
508                  * Prime this virtqueue with one buffer so the hypervisor can
509                  * use it to signal us later (it can't be broken yet!).
510                  */
511                 num_stats = update_balloon_stats(vb);
512
513                 sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
514                 err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb,
515                                            GFP_KERNEL);
516                 if (err) {
517                         dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n",
518                                  __func__);
519                         return err;
520                 }
521                 virtqueue_kick(vb->stats_vq);
522         }
523
524         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
525                 vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
526
527         return 0;
528 }
529
530 static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb)
531 {
532         if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
533                                &vb->config_read_bitmap))
534                 virtio_cread(vb->vdev, struct virtio_balloon_config,
535                              free_page_report_cmd_id,
536                              &vb->cmd_id_received_cache);
537
538         return vb->cmd_id_received_cache;
539 }
540
541 static int send_cmd_id_start(struct virtio_balloon *vb)
542 {
543         struct scatterlist sg;
544         struct virtqueue *vq = vb->free_page_vq;
545         int err, unused;
546
547         /* Detach all the used buffers from the vq */
548         while (virtqueue_get_buf(vq, &unused))
549                 ;
550
551         vb->cmd_id_active = virtio32_to_cpu(vb->vdev,
552                                         virtio_balloon_cmd_id_received(vb));
553         sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
554         err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
555         if (!err)
556                 virtqueue_kick(vq);
557         return err;
558 }
559
560 static int send_cmd_id_stop(struct virtio_balloon *vb)
561 {
562         struct scatterlist sg;
563         struct virtqueue *vq = vb->free_page_vq;
564         int err, unused;
565
566         /* Detach all the used buffers from the vq */
567         while (virtqueue_get_buf(vq, &unused))
568                 ;
569
570         sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
571         err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
572         if (!err)
573                 virtqueue_kick(vq);
574         return err;
575 }
576
577 static int get_free_page_and_send(struct virtio_balloon *vb)
578 {
579         struct virtqueue *vq = vb->free_page_vq;
580         struct page *page;
581         struct scatterlist sg;
582         int err, unused;
583         void *p;
584
585         /* Detach all the used buffers from the vq */
586         while (virtqueue_get_buf(vq, &unused))
587                 ;
588
589         page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
590                            VIRTIO_BALLOON_HINT_BLOCK_ORDER);
591         /*
592          * When the allocation returns NULL, it indicates that we have got all
593          * the possible free pages, so return -EINTR to stop.
594          */
595         if (!page)
596                 return -EINTR;
597
598         p = page_address(page);
599         sg_init_one(&sg, p, VIRTIO_BALLOON_HINT_BLOCK_BYTES);
600         /* There is always 1 entry reserved for the cmd id to use. */
601         if (vq->num_free > 1) {
602                 err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
603                 if (unlikely(err)) {
604                         free_pages((unsigned long)p,
605                                    VIRTIO_BALLOON_HINT_BLOCK_ORDER);
606                         return err;
607                 }
608                 virtqueue_kick(vq);
609                 spin_lock_irq(&vb->free_page_list_lock);
610                 balloon_page_push(&vb->free_page_list, page);
611                 vb->num_free_page_blocks++;
612                 spin_unlock_irq(&vb->free_page_list_lock);
613         } else {
614                 /*
615                  * The vq has no available entry to add this page block, so
616                  * just free it.
617                  */
618                 free_pages((unsigned long)p, VIRTIO_BALLOON_HINT_BLOCK_ORDER);
619         }
620
621         return 0;
622 }
623
624 static int send_free_pages(struct virtio_balloon *vb)
625 {
626         int err;
627         u32 cmd_id_active;
628
629         while (1) {
630                 /*
631                  * If a stop id or a new cmd id was just received from host,
632                  * stop the reporting.
633                  */
634                 cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
635                 if (unlikely(cmd_id_active !=
636                              virtio_balloon_cmd_id_received(vb)))
637                         break;
638
639                 /*
640                  * The free page blocks are allocated and sent to host one by
641                  * one.
642                  */
643                 err = get_free_page_and_send(vb);
644                 if (err == -EINTR)
645                         break;
646                 else if (unlikely(err))
647                         return err;
648         }
649
650         return 0;
651 }
652
653 static void virtio_balloon_report_free_page(struct virtio_balloon *vb)
654 {
655         int err;
656         struct device *dev = &vb->vdev->dev;
657
658         /* Start by sending the received cmd id to host with an outbuf. */
659         err = send_cmd_id_start(vb);
660         if (unlikely(err))
661                 dev_err(dev, "Failed to send a start id, err = %d\n", err);
662
663         err = send_free_pages(vb);
664         if (unlikely(err))
665                 dev_err(dev, "Failed to send a free page, err = %d\n", err);
666
667         /* End by sending a stop id to host with an outbuf. */
668         err = send_cmd_id_stop(vb);
669         if (unlikely(err))
670                 dev_err(dev, "Failed to send a stop id, err = %d\n", err);
671 }
672
673 static void report_free_page_func(struct work_struct *work)
674 {
675         struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
676                                                  report_free_page_work);
677         u32 cmd_id_received;
678
679         cmd_id_received = virtio_balloon_cmd_id_received(vb);
680         if (cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
681                 /* Pass ULONG_MAX to give back all the free pages */
682                 return_free_pages_to_mm(vb, ULONG_MAX);
683         } else if (cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
684                    cmd_id_received !=
685                    virtio32_to_cpu(vb->vdev, vb->cmd_id_active)) {
686                 virtio_balloon_report_free_page(vb);
687         }
688 }
689
690 #ifdef CONFIG_BALLOON_COMPACTION
691 /*
692  * virtballoon_migratepage - perform the balloon page migration on behalf of
693  *                           a compation thread.     (called under page lock)
694  * @vb_dev_info: the balloon device
695  * @newpage: page that will replace the isolated page after migration finishes.
696  * @page   : the isolated (old) page that is about to be migrated to newpage.
697  * @mode   : compaction mode -- not used for balloon page migration.
698  *
699  * After a ballooned page gets isolated by compaction procedures, this is the
700  * function that performs the page migration on behalf of a compaction thread
701  * The page migration for virtio balloon is done in a simple swap fashion which
702  * follows these two macro steps:
703  *  1) insert newpage into vb->pages list and update the host about it;
704  *  2) update the host about the old page removed from vb->pages list;
705  *
706  * This function preforms the balloon page migration task.
707  * Called through balloon_mapping->a_ops->migratepage
708  */
709 static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
710                 struct page *newpage, struct page *page, enum migrate_mode mode)
711 {
712         struct virtio_balloon *vb = container_of(vb_dev_info,
713                         struct virtio_balloon, vb_dev_info);
714         unsigned long flags;
715
716         /*
717          * In order to avoid lock contention while migrating pages concurrently
718          * to leak_balloon() or fill_balloon() we just give up the balloon_lock
719          * this turn, as it is easier to retry the page migration later.
720          * This also prevents fill_balloon() getting stuck into a mutex
721          * recursion in the case it ends up triggering memory compaction
722          * while it is attempting to inflate the ballon.
723          */
724         if (!mutex_trylock(&vb->balloon_lock))
725                 return -EAGAIN;
726
727         get_page(newpage); /* balloon reference */
728
729         /*
730           * When we migrate a page to a different zone and adjusted the
731           * managed page count when inflating, we have to fixup the count of
732           * both involved zones.
733           */
734         if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) &&
735             page_zone(page) != page_zone(newpage)) {
736                 adjust_managed_page_count(page, 1);
737                 adjust_managed_page_count(newpage, -1);
738         }
739
740         /* balloon's page migration 1st step  -- inflate "newpage" */
741         spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
742         balloon_page_insert(vb_dev_info, newpage);
743         vb_dev_info->isolated_pages--;
744         __count_vm_event(BALLOON_MIGRATE);
745         spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
746         vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
747         set_page_pfns(vb, vb->pfns, newpage);
748         tell_host(vb, vb->inflate_vq);
749
750         /* balloon's page migration 2nd step -- deflate "page" */
751         spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
752         balloon_page_delete(page);
753         spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
754         vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
755         set_page_pfns(vb, vb->pfns, page);
756         tell_host(vb, vb->deflate_vq);
757
758         mutex_unlock(&vb->balloon_lock);
759
760         put_page(page); /* balloon reference */
761
762         return MIGRATEPAGE_SUCCESS;
763 }
764
765 static int balloon_init_fs_context(struct fs_context *fc)
766 {
767         return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
768 }
769
770 static struct file_system_type balloon_fs = {
771         .name           = "balloon-kvm",
772         .init_fs_context = balloon_init_fs_context,
773         .kill_sb        = kill_anon_super,
774 };
775
776 #endif /* CONFIG_BALLOON_COMPACTION */
777
778 static unsigned long shrink_free_pages(struct virtio_balloon *vb,
779                                        unsigned long pages_to_free)
780 {
781         unsigned long blocks_to_free, blocks_freed;
782
783         pages_to_free = round_up(pages_to_free,
784                                  VIRTIO_BALLOON_HINT_BLOCK_PAGES);
785         blocks_to_free = pages_to_free / VIRTIO_BALLOON_HINT_BLOCK_PAGES;
786         blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
787
788         return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
789 }
790
791 static unsigned long leak_balloon_pages(struct virtio_balloon *vb,
792                                           unsigned long pages_to_free)
793 {
794         return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) /
795                 VIRTIO_BALLOON_PAGES_PER_PAGE;
796 }
797
798 static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
799                                           unsigned long pages_to_free)
800 {
801         unsigned long pages_freed = 0;
802
803         /*
804          * One invocation of leak_balloon can deflate at most
805          * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
806          * multiple times to deflate pages till reaching pages_to_free.
807          */
808         while (vb->num_pages && pages_freed < pages_to_free)
809                 pages_freed += leak_balloon_pages(vb,
810                                                   pages_to_free - pages_freed);
811
812         update_balloon_size(vb);
813
814         return pages_freed;
815 }
816
817 static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
818                                                   struct shrink_control *sc)
819 {
820         unsigned long pages_to_free, pages_freed = 0;
821         struct virtio_balloon *vb = container_of(shrinker,
822                                         struct virtio_balloon, shrinker);
823
824         pages_to_free = sc->nr_to_scan;
825
826         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
827                 pages_freed = shrink_free_pages(vb, pages_to_free);
828
829         if (pages_freed >= pages_to_free)
830                 return pages_freed;
831
832         pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
833
834         return pages_freed;
835 }
836
837 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
838                                                    struct shrink_control *sc)
839 {
840         struct virtio_balloon *vb = container_of(shrinker,
841                                         struct virtio_balloon, shrinker);
842         unsigned long count;
843
844         count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
845         count += vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
846
847         return count;
848 }
849
850 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
851 {
852         unregister_shrinker(&vb->shrinker);
853 }
854
855 static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
856 {
857         vb->shrinker.scan_objects = virtio_balloon_shrinker_scan;
858         vb->shrinker.count_objects = virtio_balloon_shrinker_count;
859         vb->shrinker.seeks = DEFAULT_SEEKS;
860
861         return register_shrinker(&vb->shrinker);
862 }
863
864 static int virtballoon_probe(struct virtio_device *vdev)
865 {
866         struct virtio_balloon *vb;
867         __u32 poison_val;
868         int err;
869
870         if (!vdev->config->get) {
871                 dev_err(&vdev->dev, "%s failure: config access disabled\n",
872                         __func__);
873                 return -EINVAL;
874         }
875
876         vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
877         if (!vb) {
878                 err = -ENOMEM;
879                 goto out;
880         }
881
882         INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
883         INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
884         spin_lock_init(&vb->stop_update_lock);
885         mutex_init(&vb->balloon_lock);
886         init_waitqueue_head(&vb->acked);
887         vb->vdev = vdev;
888
889         balloon_devinfo_init(&vb->vb_dev_info);
890
891         err = init_vqs(vb);
892         if (err)
893                 goto out_free_vb;
894
895 #ifdef CONFIG_BALLOON_COMPACTION
896         balloon_mnt = kern_mount(&balloon_fs);
897         if (IS_ERR(balloon_mnt)) {
898                 err = PTR_ERR(balloon_mnt);
899                 goto out_del_vqs;
900         }
901
902         vb->vb_dev_info.migratepage = virtballoon_migratepage;
903         vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
904         if (IS_ERR(vb->vb_dev_info.inode)) {
905                 err = PTR_ERR(vb->vb_dev_info.inode);
906                 goto out_kern_unmount;
907         }
908         vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
909 #endif
910         if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
911                 /*
912                  * There is always one entry reserved for cmd id, so the ring
913                  * size needs to be at least two to report free page hints.
914                  */
915                 if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
916                         err = -ENOSPC;
917                         goto out_iput;
918                 }
919                 vb->balloon_wq = alloc_workqueue("balloon-wq",
920                                         WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
921                 if (!vb->balloon_wq) {
922                         err = -ENOMEM;
923                         goto out_iput;
924                 }
925                 INIT_WORK(&vb->report_free_page_work, report_free_page_func);
926                 vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
927                 vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
928                                                   VIRTIO_BALLOON_CMD_ID_STOP);
929                 vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
930                                                   VIRTIO_BALLOON_CMD_ID_STOP);
931                 spin_lock_init(&vb->free_page_list_lock);
932                 INIT_LIST_HEAD(&vb->free_page_list);
933                 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
934                         memset(&poison_val, PAGE_POISON, sizeof(poison_val));
935                         virtio_cwrite(vb->vdev, struct virtio_balloon_config,
936                                       poison_val, &poison_val);
937                 }
938         }
939         /*
940          * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
941          * shrinker needs to be registered to relieve memory pressure.
942          */
943         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
944                 err = virtio_balloon_register_shrinker(vb);
945                 if (err)
946                         goto out_del_balloon_wq;
947         }
948         virtio_device_ready(vdev);
949
950         if (towards_target(vb))
951                 virtballoon_changed(vdev);
952         return 0;
953
954 out_del_balloon_wq:
955         if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
956                 destroy_workqueue(vb->balloon_wq);
957 out_iput:
958 #ifdef CONFIG_BALLOON_COMPACTION
959         iput(vb->vb_dev_info.inode);
960 out_kern_unmount:
961         kern_unmount(balloon_mnt);
962 out_del_vqs:
963 #endif
964         vdev->config->del_vqs(vdev);
965 out_free_vb:
966         kfree(vb);
967 out:
968         return err;
969 }
970
971 static void remove_common(struct virtio_balloon *vb)
972 {
973         /* There might be pages left in the balloon: free them. */
974         while (vb->num_pages)
975                 leak_balloon(vb, vb->num_pages);
976         update_balloon_size(vb);
977
978         /* There might be free pages that are being reported: release them. */
979         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
980                 return_free_pages_to_mm(vb, ULONG_MAX);
981
982         /* Now we reset the device so we can clean up the queues. */
983         vb->vdev->config->reset(vb->vdev);
984
985         vb->vdev->config->del_vqs(vb->vdev);
986 }
987
988 static void virtballoon_remove(struct virtio_device *vdev)
989 {
990         struct virtio_balloon *vb = vdev->priv;
991
992         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
993                 virtio_balloon_unregister_shrinker(vb);
994         spin_lock_irq(&vb->stop_update_lock);
995         vb->stop_update = true;
996         spin_unlock_irq(&vb->stop_update_lock);
997         cancel_work_sync(&vb->update_balloon_size_work);
998         cancel_work_sync(&vb->update_balloon_stats_work);
999
1000         if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
1001                 cancel_work_sync(&vb->report_free_page_work);
1002                 destroy_workqueue(vb->balloon_wq);
1003         }
1004
1005         remove_common(vb);
1006 #ifdef CONFIG_BALLOON_COMPACTION
1007         if (vb->vb_dev_info.inode)
1008                 iput(vb->vb_dev_info.inode);
1009
1010         kern_unmount(balloon_mnt);
1011 #endif
1012         kfree(vb);
1013 }
1014
1015 #ifdef CONFIG_PM_SLEEP
1016 static int virtballoon_freeze(struct virtio_device *vdev)
1017 {
1018         struct virtio_balloon *vb = vdev->priv;
1019
1020         /*
1021          * The workqueue is already frozen by the PM core before this
1022          * function is called.
1023          */
1024         remove_common(vb);
1025         return 0;
1026 }
1027
1028 static int virtballoon_restore(struct virtio_device *vdev)
1029 {
1030         struct virtio_balloon *vb = vdev->priv;
1031         int ret;
1032
1033         ret = init_vqs(vdev->priv);
1034         if (ret)
1035                 return ret;
1036
1037         virtio_device_ready(vdev);
1038
1039         if (towards_target(vb))
1040                 virtballoon_changed(vdev);
1041         update_balloon_size(vb);
1042         return 0;
1043 }
1044 #endif
1045
1046 static int virtballoon_validate(struct virtio_device *vdev)
1047 {
1048         if (!page_poisoning_enabled())
1049                 __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
1050
1051         __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
1052         return 0;
1053 }
1054
1055 static unsigned int features[] = {
1056         VIRTIO_BALLOON_F_MUST_TELL_HOST,
1057         VIRTIO_BALLOON_F_STATS_VQ,
1058         VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
1059         VIRTIO_BALLOON_F_FREE_PAGE_HINT,
1060         VIRTIO_BALLOON_F_PAGE_POISON,
1061 };
1062
1063 static struct virtio_driver virtio_balloon_driver = {
1064         .feature_table = features,
1065         .feature_table_size = ARRAY_SIZE(features),
1066         .driver.name =  KBUILD_MODNAME,
1067         .driver.owner = THIS_MODULE,
1068         .id_table =     id_table,
1069         .validate =     virtballoon_validate,
1070         .probe =        virtballoon_probe,
1071         .remove =       virtballoon_remove,
1072         .config_changed = virtballoon_changed,
1073 #ifdef CONFIG_PM_SLEEP
1074         .freeze =       virtballoon_freeze,
1075         .restore =      virtballoon_restore,
1076 #endif
1077 };
1078
1079 module_virtio_driver(virtio_balloon_driver);
1080 MODULE_DEVICE_TABLE(virtio, id_table);
1081 MODULE_DESCRIPTION("Virtio balloon driver");
1082 MODULE_LICENSE("GPL");