OSDN Git Service

Merge 4.4.154 into android-4.4
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / fs / fuse / dev.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/namei.h>
17 #include <linux/pagemap.h>
18 #include <linux/file.h>
19 #include <linux/slab.h>
20 #include <linux/pipe_fs_i.h>
21 #include <linux/swap.h>
22 #include <linux/splice.h>
23 #include <linux/freezer.h>
24
25 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
26 MODULE_ALIAS("devname:fuse");
27
28 static struct kmem_cache *fuse_req_cachep;
29
30 static struct fuse_dev *fuse_get_dev(struct file *file)
31 {
32         /*
33          * Lockless access is OK, because file->private data is set
34          * once during mount and is valid until the file is released.
35          */
36         return ACCESS_ONCE(file->private_data);
37 }
38
39 static void fuse_request_init(struct fuse_req *req, struct page **pages,
40                               struct fuse_page_desc *page_descs,
41                               unsigned npages)
42 {
43         memset(req, 0, sizeof(*req));
44         memset(pages, 0, sizeof(*pages) * npages);
45         memset(page_descs, 0, sizeof(*page_descs) * npages);
46         INIT_LIST_HEAD(&req->list);
47         INIT_LIST_HEAD(&req->intr_entry);
48         init_waitqueue_head(&req->waitq);
49         atomic_set(&req->count, 1);
50         req->pages = pages;
51         req->page_descs = page_descs;
52         req->max_pages = npages;
53         __set_bit(FR_PENDING, &req->flags);
54 }
55
56 static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
57 {
58         struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
59         if (req) {
60                 struct page **pages;
61                 struct fuse_page_desc *page_descs;
62
63                 if (npages <= FUSE_REQ_INLINE_PAGES) {
64                         pages = req->inline_pages;
65                         page_descs = req->inline_page_descs;
66                 } else {
67                         pages = kmalloc(sizeof(struct page *) * npages, flags);
68                         page_descs = kmalloc(sizeof(struct fuse_page_desc) *
69                                              npages, flags);
70                 }
71
72                 if (!pages || !page_descs) {
73                         kfree(pages);
74                         kfree(page_descs);
75                         kmem_cache_free(fuse_req_cachep, req);
76                         return NULL;
77                 }
78
79                 fuse_request_init(req, pages, page_descs, npages);
80         }
81         return req;
82 }
83
84 struct fuse_req *fuse_request_alloc(unsigned npages)
85 {
86         return __fuse_request_alloc(npages, GFP_KERNEL);
87 }
88 EXPORT_SYMBOL_GPL(fuse_request_alloc);
89
90 struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
91 {
92         return __fuse_request_alloc(npages, GFP_NOFS);
93 }
94
95 void fuse_request_free(struct fuse_req *req)
96 {
97         if (req->pages != req->inline_pages) {
98                 kfree(req->pages);
99                 kfree(req->page_descs);
100         }
101         kmem_cache_free(fuse_req_cachep, req);
102 }
103
104 static void block_sigs(sigset_t *oldset)
105 {
106         sigset_t mask;
107
108         siginitsetinv(&mask, sigmask(SIGKILL));
109         sigprocmask(SIG_BLOCK, &mask, oldset);
110 }
111
112 static void restore_sigs(sigset_t *oldset)
113 {
114         sigprocmask(SIG_SETMASK, oldset, NULL);
115 }
116
117 void __fuse_get_request(struct fuse_req *req)
118 {
119         atomic_inc(&req->count);
120 }
121
122 /* Must be called with > 1 refcount */
123 static void __fuse_put_request(struct fuse_req *req)
124 {
125         BUG_ON(atomic_read(&req->count) < 2);
126         atomic_dec(&req->count);
127 }
128
129 static void fuse_req_init_context(struct fuse_req *req)
130 {
131         req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
132         req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
133         req->in.h.pid = current->pid;
134 }
135
136 void fuse_set_initialized(struct fuse_conn *fc)
137 {
138         /* Make sure stores before this are seen on another CPU */
139         smp_wmb();
140         fc->initialized = 1;
141 }
142
143 static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
144 {
145         return !fc->initialized || (for_background && fc->blocked);
146 }
147
148 static void fuse_drop_waiting(struct fuse_conn *fc)
149 {
150         if (fc->connected) {
151                 atomic_dec(&fc->num_waiting);
152         } else if (atomic_dec_and_test(&fc->num_waiting)) {
153                 /* wake up aborters */
154                 wake_up_all(&fc->blocked_waitq);
155         }
156 }
157
158 static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
159                                        bool for_background)
160 {
161         struct fuse_req *req;
162         int err;
163         atomic_inc(&fc->num_waiting);
164
165         if (fuse_block_alloc(fc, for_background)) {
166                 sigset_t oldset;
167                 int intr;
168
169                 block_sigs(&oldset);
170                 intr = wait_event_interruptible_exclusive(fc->blocked_waitq,
171                                 !fuse_block_alloc(fc, for_background));
172                 restore_sigs(&oldset);
173                 err = -EINTR;
174                 if (intr)
175                         goto out;
176         }
177         /* Matches smp_wmb() in fuse_set_initialized() */
178         smp_rmb();
179
180         err = -ENOTCONN;
181         if (!fc->connected)
182                 goto out;
183
184         err = -ECONNREFUSED;
185         if (fc->conn_error)
186                 goto out;
187
188         req = fuse_request_alloc(npages);
189         err = -ENOMEM;
190         if (!req) {
191                 if (for_background)
192                         wake_up(&fc->blocked_waitq);
193                 goto out;
194         }
195
196         fuse_req_init_context(req);
197         __set_bit(FR_WAITING, &req->flags);
198         if (for_background)
199                 __set_bit(FR_BACKGROUND, &req->flags);
200
201         return req;
202
203  out:
204         fuse_drop_waiting(fc);
205         return ERR_PTR(err);
206 }
207
208 struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
209 {
210         return __fuse_get_req(fc, npages, false);
211 }
212 EXPORT_SYMBOL_GPL(fuse_get_req);
213
214 struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
215                                              unsigned npages)
216 {
217         return __fuse_get_req(fc, npages, true);
218 }
219 EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
220
221 /*
222  * Return request in fuse_file->reserved_req.  However that may
223  * currently be in use.  If that is the case, wait for it to become
224  * available.
225  */
226 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
227                                          struct file *file)
228 {
229         struct fuse_req *req = NULL;
230         struct fuse_file *ff = file->private_data;
231
232         do {
233                 wait_event(fc->reserved_req_waitq, ff->reserved_req);
234                 spin_lock(&fc->lock);
235                 if (ff->reserved_req) {
236                         req = ff->reserved_req;
237                         ff->reserved_req = NULL;
238                         req->stolen_file = get_file(file);
239                 }
240                 spin_unlock(&fc->lock);
241         } while (!req);
242
243         return req;
244 }
245
246 /*
247  * Put stolen request back into fuse_file->reserved_req
248  */
249 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
250 {
251         struct file *file = req->stolen_file;
252         struct fuse_file *ff = file->private_data;
253
254         spin_lock(&fc->lock);
255         fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
256         BUG_ON(ff->reserved_req);
257         ff->reserved_req = req;
258         wake_up_all(&fc->reserved_req_waitq);
259         spin_unlock(&fc->lock);
260         fput(file);
261 }
262
263 /*
264  * Gets a requests for a file operation, always succeeds
265  *
266  * This is used for sending the FLUSH request, which must get to
267  * userspace, due to POSIX locks which may need to be unlocked.
268  *
269  * If allocation fails due to OOM, use the reserved request in
270  * fuse_file.
271  *
272  * This is very unlikely to deadlock accidentally, since the
273  * filesystem should not have it's own file open.  If deadlock is
274  * intentional, it can still be broken by "aborting" the filesystem.
275  */
276 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
277                                              struct file *file)
278 {
279         struct fuse_req *req;
280
281         atomic_inc(&fc->num_waiting);
282         wait_event(fc->blocked_waitq, fc->initialized);
283         /* Matches smp_wmb() in fuse_set_initialized() */
284         smp_rmb();
285         req = fuse_request_alloc(0);
286         if (!req)
287                 req = get_reserved_req(fc, file);
288
289         fuse_req_init_context(req);
290         __set_bit(FR_WAITING, &req->flags);
291         __clear_bit(FR_BACKGROUND, &req->flags);
292         return req;
293 }
294
295 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
296 {
297         if (atomic_dec_and_test(&req->count)) {
298                 if (test_bit(FR_BACKGROUND, &req->flags)) {
299                         /*
300                          * We get here in the unlikely case that a background
301                          * request was allocated but not sent
302                          */
303                         spin_lock(&fc->lock);
304                         if (!fc->blocked)
305                                 wake_up(&fc->blocked_waitq);
306                         spin_unlock(&fc->lock);
307                 }
308
309                 if (test_bit(FR_WAITING, &req->flags)) {
310                         __clear_bit(FR_WAITING, &req->flags);
311                         fuse_drop_waiting(fc);
312                 }
313
314                 if (req->stolen_file)
315                         put_reserved_req(fc, req);
316                 else
317                         fuse_request_free(req);
318         }
319 }
320 EXPORT_SYMBOL_GPL(fuse_put_request);
321
322 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
323 {
324         unsigned nbytes = 0;
325         unsigned i;
326
327         for (i = 0; i < numargs; i++)
328                 nbytes += args[i].size;
329
330         return nbytes;
331 }
332
333 static u64 fuse_get_unique(struct fuse_iqueue *fiq)
334 {
335         return ++fiq->reqctr;
336 }
337
338 static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
339 {
340         req->in.h.len = sizeof(struct fuse_in_header) +
341                 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
342         list_add_tail(&req->list, &fiq->pending);
343         wake_up_locked(&fiq->waitq);
344         kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
345 }
346
347 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
348                        u64 nodeid, u64 nlookup)
349 {
350         struct fuse_iqueue *fiq = &fc->iq;
351
352         forget->forget_one.nodeid = nodeid;
353         forget->forget_one.nlookup = nlookup;
354
355         spin_lock(&fiq->waitq.lock);
356         if (fiq->connected) {
357                 fiq->forget_list_tail->next = forget;
358                 fiq->forget_list_tail = forget;
359                 wake_up_locked(&fiq->waitq);
360                 kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
361         } else {
362                 kfree(forget);
363         }
364         spin_unlock(&fiq->waitq.lock);
365 }
366
367 static void flush_bg_queue(struct fuse_conn *fc)
368 {
369         while (fc->active_background < fc->max_background &&
370                !list_empty(&fc->bg_queue)) {
371                 struct fuse_req *req;
372                 struct fuse_iqueue *fiq = &fc->iq;
373
374                 req = list_entry(fc->bg_queue.next, struct fuse_req, list);
375                 list_del(&req->list);
376                 fc->active_background++;
377                 spin_lock(&fiq->waitq.lock);
378                 req->in.h.unique = fuse_get_unique(fiq);
379                 queue_request(fiq, req);
380                 spin_unlock(&fiq->waitq.lock);
381         }
382 }
383
384 /*
385  * This function is called when a request is finished.  Either a reply
386  * has arrived or it was aborted (and not yet sent) or some error
387  * occurred during communication with userspace, or the device file
388  * was closed.  The requester thread is woken up (if still waiting),
389  * the 'end' callback is called if given, else the reference to the
390  * request is released
391  */
392 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
393 {
394         struct fuse_iqueue *fiq = &fc->iq;
395
396         if (test_and_set_bit(FR_FINISHED, &req->flags))
397                 goto put_request;
398
399         spin_lock(&fiq->waitq.lock);
400         list_del_init(&req->intr_entry);
401         spin_unlock(&fiq->waitq.lock);
402         WARN_ON(test_bit(FR_PENDING, &req->flags));
403         WARN_ON(test_bit(FR_SENT, &req->flags));
404         if (test_bit(FR_BACKGROUND, &req->flags)) {
405                 spin_lock(&fc->lock);
406                 clear_bit(FR_BACKGROUND, &req->flags);
407                 if (fc->num_background == fc->max_background)
408                         fc->blocked = 0;
409
410                 /* Wake up next waiter, if any */
411                 if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
412                         wake_up(&fc->blocked_waitq);
413
414                 if (fc->num_background == fc->congestion_threshold &&
415                     fc->connected && fc->bdi_initialized) {
416                         clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
417                         clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
418                 }
419                 fc->num_background--;
420                 fc->active_background--;
421                 flush_bg_queue(fc);
422                 spin_unlock(&fc->lock);
423         }
424         wake_up(&req->waitq);
425         if (req->end)
426                 req->end(fc, req);
427 put_request:
428         fuse_put_request(fc, req);
429 }
430
431 static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
432 {
433         spin_lock(&fiq->waitq.lock);
434         if (test_bit(FR_FINISHED, &req->flags)) {
435                 spin_unlock(&fiq->waitq.lock);
436                 return;
437         }
438         if (list_empty(&req->intr_entry)) {
439                 list_add_tail(&req->intr_entry, &fiq->interrupts);
440                 wake_up_locked(&fiq->waitq);
441         }
442         spin_unlock(&fiq->waitq.lock);
443         kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
444 }
445
446 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
447 {
448         struct fuse_iqueue *fiq = &fc->iq;
449         int err;
450
451         if (!fc->no_interrupt) {
452                 /* Any signal may interrupt this */
453                 err = wait_event_interruptible(req->waitq,
454                                         test_bit(FR_FINISHED, &req->flags));
455                 if (!err)
456                         return;
457
458                 set_bit(FR_INTERRUPTED, &req->flags);
459                 /* matches barrier in fuse_dev_do_read() */
460                 smp_mb__after_atomic();
461                 if (test_bit(FR_SENT, &req->flags))
462                         queue_interrupt(fiq, req);
463         }
464
465         if (!test_bit(FR_FORCE, &req->flags)) {
466                 sigset_t oldset;
467
468                 /* Only fatal signals may interrupt this */
469                 block_sigs(&oldset);
470                 err = wait_event_interruptible(req->waitq,
471                                         test_bit(FR_FINISHED, &req->flags));
472                 restore_sigs(&oldset);
473
474                 if (!err)
475                         return;
476
477                 spin_lock(&fiq->waitq.lock);
478                 /* Request is not yet in userspace, bail out */
479                 if (test_bit(FR_PENDING, &req->flags)) {
480                         list_del(&req->list);
481                         spin_unlock(&fiq->waitq.lock);
482                         __fuse_put_request(req);
483                         req->out.h.error = -EINTR;
484                         return;
485                 }
486                 spin_unlock(&fiq->waitq.lock);
487         }
488
489         /*
490          * Either request is already in userspace, or it was forced.
491          * Wait it out.
492          */
493         while (!test_bit(FR_FINISHED, &req->flags))
494                 wait_event_freezable(req->waitq,
495                                 test_bit(FR_FINISHED, &req->flags));
496 }
497
498 static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
499 {
500         struct fuse_iqueue *fiq = &fc->iq;
501
502         BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
503         spin_lock(&fiq->waitq.lock);
504         if (!fiq->connected) {
505                 spin_unlock(&fiq->waitq.lock);
506                 req->out.h.error = -ENOTCONN;
507         } else {
508                 req->in.h.unique = fuse_get_unique(fiq);
509                 queue_request(fiq, req);
510                 /* acquire extra reference, since request is still needed
511                    after request_end() */
512                 __fuse_get_request(req);
513                 spin_unlock(&fiq->waitq.lock);
514
515                 request_wait_answer(fc, req);
516                 /* Pairs with smp_wmb() in request_end() */
517                 smp_rmb();
518         }
519 }
520
521 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
522 {
523         __set_bit(FR_ISREPLY, &req->flags);
524         if (!test_bit(FR_WAITING, &req->flags)) {
525                 __set_bit(FR_WAITING, &req->flags);
526                 atomic_inc(&fc->num_waiting);
527         }
528         __fuse_request_send(fc, req);
529 }
530 EXPORT_SYMBOL_GPL(fuse_request_send);
531
532 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
533 {
534         if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
535                 args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
536
537         if (fc->minor < 9) {
538                 switch (args->in.h.opcode) {
539                 case FUSE_LOOKUP:
540                 case FUSE_CREATE:
541                 case FUSE_MKNOD:
542                 case FUSE_MKDIR:
543                 case FUSE_SYMLINK:
544                 case FUSE_LINK:
545                         args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
546                         break;
547                 case FUSE_GETATTR:
548                 case FUSE_SETATTR:
549                         args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
550                         break;
551                 }
552         }
553         if (fc->minor < 12) {
554                 switch (args->in.h.opcode) {
555                 case FUSE_CREATE:
556                         args->in.args[0].size = sizeof(struct fuse_open_in);
557                         break;
558                 case FUSE_MKNOD:
559                         args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
560                         break;
561                 }
562         }
563 }
564
565 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
566 {
567         struct fuse_req *req;
568         ssize_t ret;
569
570         req = fuse_get_req(fc, 0);
571         if (IS_ERR(req))
572                 return PTR_ERR(req);
573
574         /* Needs to be done after fuse_get_req() so that fc->minor is valid */
575         fuse_adjust_compat(fc, args);
576
577         req->in.h.opcode = args->in.h.opcode;
578         req->in.h.nodeid = args->in.h.nodeid;
579         req->in.numargs = args->in.numargs;
580         memcpy(req->in.args, args->in.args,
581                args->in.numargs * sizeof(struct fuse_in_arg));
582         req->out.argvar = args->out.argvar;
583         req->out.numargs = args->out.numargs;
584         memcpy(req->out.args, args->out.args,
585                args->out.numargs * sizeof(struct fuse_arg));
586         fuse_request_send(fc, req);
587         ret = req->out.h.error;
588         if (!ret && args->out.argvar) {
589                 BUG_ON(args->out.numargs != 1);
590                 ret = req->out.args[0].size;
591         }
592         fuse_put_request(fc, req);
593
594         return ret;
595 }
596
597 /*
598  * Called under fc->lock
599  *
600  * fc->connected must have been checked previously
601  */
602 void fuse_request_send_background_locked(struct fuse_conn *fc,
603                                          struct fuse_req *req)
604 {
605         BUG_ON(!test_bit(FR_BACKGROUND, &req->flags));
606         if (!test_bit(FR_WAITING, &req->flags)) {
607                 __set_bit(FR_WAITING, &req->flags);
608                 atomic_inc(&fc->num_waiting);
609         }
610         __set_bit(FR_ISREPLY, &req->flags);
611         fc->num_background++;
612         if (fc->num_background == fc->max_background)
613                 fc->blocked = 1;
614         if (fc->num_background == fc->congestion_threshold &&
615             fc->bdi_initialized) {
616                 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
617                 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
618         }
619         list_add_tail(&req->list, &fc->bg_queue);
620         flush_bg_queue(fc);
621 }
622
623 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
624 {
625         BUG_ON(!req->end);
626         spin_lock(&fc->lock);
627         if (fc->connected) {
628                 fuse_request_send_background_locked(fc, req);
629                 spin_unlock(&fc->lock);
630         } else {
631                 spin_unlock(&fc->lock);
632                 req->out.h.error = -ENOTCONN;
633                 req->end(fc, req);
634                 fuse_put_request(fc, req);
635         }
636 }
637 EXPORT_SYMBOL_GPL(fuse_request_send_background);
638
639 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
640                                           struct fuse_req *req, u64 unique)
641 {
642         int err = -ENODEV;
643         struct fuse_iqueue *fiq = &fc->iq;
644
645         __clear_bit(FR_ISREPLY, &req->flags);
646         req->in.h.unique = unique;
647         spin_lock(&fiq->waitq.lock);
648         if (fiq->connected) {
649                 queue_request(fiq, req);
650                 err = 0;
651         }
652         spin_unlock(&fiq->waitq.lock);
653
654         return err;
655 }
656
657 void fuse_force_forget(struct file *file, u64 nodeid)
658 {
659         struct inode *inode = file_inode(file);
660         struct fuse_conn *fc = get_fuse_conn(inode);
661         struct fuse_req *req;
662         struct fuse_forget_in inarg;
663
664         memset(&inarg, 0, sizeof(inarg));
665         inarg.nlookup = 1;
666         req = fuse_get_req_nofail_nopages(fc, file);
667         req->in.h.opcode = FUSE_FORGET;
668         req->in.h.nodeid = nodeid;
669         req->in.numargs = 1;
670         req->in.args[0].size = sizeof(inarg);
671         req->in.args[0].value = &inarg;
672         __clear_bit(FR_ISREPLY, &req->flags);
673         __fuse_request_send(fc, req);
674         /* ignore errors */
675         fuse_put_request(fc, req);
676 }
677
678 /*
679  * Lock the request.  Up to the next unlock_request() there mustn't be
680  * anything that could cause a page-fault.  If the request was already
681  * aborted bail out.
682  */
683 static int lock_request(struct fuse_req *req)
684 {
685         int err = 0;
686         if (req) {
687                 spin_lock(&req->waitq.lock);
688                 if (test_bit(FR_ABORTED, &req->flags))
689                         err = -ENOENT;
690                 else
691                         set_bit(FR_LOCKED, &req->flags);
692                 spin_unlock(&req->waitq.lock);
693         }
694         return err;
695 }
696
697 /*
698  * Unlock request.  If it was aborted while locked, caller is responsible
699  * for unlocking and ending the request.
700  */
701 static int unlock_request(struct fuse_req *req)
702 {
703         int err = 0;
704         if (req) {
705                 spin_lock(&req->waitq.lock);
706                 if (test_bit(FR_ABORTED, &req->flags))
707                         err = -ENOENT;
708                 else
709                         clear_bit(FR_LOCKED, &req->flags);
710                 spin_unlock(&req->waitq.lock);
711         }
712         return err;
713 }
714
715 struct fuse_copy_state {
716         int write;
717         struct fuse_req *req;
718         struct iov_iter *iter;
719         struct pipe_buffer *pipebufs;
720         struct pipe_buffer *currbuf;
721         struct pipe_inode_info *pipe;
722         unsigned long nr_segs;
723         struct page *pg;
724         unsigned len;
725         unsigned offset;
726         unsigned move_pages:1;
727 };
728
729 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
730                            struct iov_iter *iter)
731 {
732         memset(cs, 0, sizeof(*cs));
733         cs->write = write;
734         cs->iter = iter;
735 }
736
737 /* Unmap and put previous page of userspace buffer */
738 static void fuse_copy_finish(struct fuse_copy_state *cs)
739 {
740         if (cs->currbuf) {
741                 struct pipe_buffer *buf = cs->currbuf;
742
743                 if (cs->write)
744                         buf->len = PAGE_SIZE - cs->len;
745                 cs->currbuf = NULL;
746         } else if (cs->pg) {
747                 if (cs->write) {
748                         flush_dcache_page(cs->pg);
749                         set_page_dirty_lock(cs->pg);
750                 }
751                 put_page(cs->pg);
752         }
753         cs->pg = NULL;
754 }
755
756 /*
757  * Get another pagefull of userspace buffer, and map it to kernel
758  * address space, and lock request
759  */
760 static int fuse_copy_fill(struct fuse_copy_state *cs)
761 {
762         struct page *page;
763         int err;
764
765         err = unlock_request(cs->req);
766         if (err)
767                 return err;
768
769         fuse_copy_finish(cs);
770         if (cs->pipebufs) {
771                 struct pipe_buffer *buf = cs->pipebufs;
772
773                 if (!cs->write) {
774                         err = buf->ops->confirm(cs->pipe, buf);
775                         if (err)
776                                 return err;
777
778                         BUG_ON(!cs->nr_segs);
779                         cs->currbuf = buf;
780                         cs->pg = buf->page;
781                         cs->offset = buf->offset;
782                         cs->len = buf->len;
783                         cs->pipebufs++;
784                         cs->nr_segs--;
785                 } else {
786                         if (cs->nr_segs == cs->pipe->buffers)
787                                 return -EIO;
788
789                         page = alloc_page(GFP_HIGHUSER);
790                         if (!page)
791                                 return -ENOMEM;
792
793                         buf->page = page;
794                         buf->offset = 0;
795                         buf->len = 0;
796
797                         cs->currbuf = buf;
798                         cs->pg = page;
799                         cs->offset = 0;
800                         cs->len = PAGE_SIZE;
801                         cs->pipebufs++;
802                         cs->nr_segs++;
803                 }
804         } else {
805                 size_t off;
806                 err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
807                 if (err < 0)
808                         return err;
809                 BUG_ON(!err);
810                 cs->len = err;
811                 cs->offset = off;
812                 cs->pg = page;
813                 cs->offset = off;
814                 iov_iter_advance(cs->iter, err);
815         }
816
817         return lock_request(cs->req);
818 }
819
820 /* Do as much copy to/from userspace buffer as we can */
821 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
822 {
823         unsigned ncpy = min(*size, cs->len);
824         if (val) {
825                 void *pgaddr = kmap_atomic(cs->pg);
826                 void *buf = pgaddr + cs->offset;
827
828                 if (cs->write)
829                         memcpy(buf, *val, ncpy);
830                 else
831                         memcpy(*val, buf, ncpy);
832
833                 kunmap_atomic(pgaddr);
834                 *val += ncpy;
835         }
836         *size -= ncpy;
837         cs->len -= ncpy;
838         cs->offset += ncpy;
839         return ncpy;
840 }
841
842 static int fuse_check_page(struct page *page)
843 {
844         if (page_mapcount(page) ||
845             page->mapping != NULL ||
846             page_count(page) != 1 ||
847             (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
848              ~(1 << PG_locked |
849                1 << PG_referenced |
850                1 << PG_uptodate |
851                1 << PG_lru |
852                1 << PG_active |
853                1 << PG_reclaim))) {
854                 printk(KERN_WARNING "fuse: trying to steal weird page\n");
855                 printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
856                 return 1;
857         }
858         return 0;
859 }
860
861 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
862 {
863         int err;
864         struct page *oldpage = *pagep;
865         struct page *newpage;
866         struct pipe_buffer *buf = cs->pipebufs;
867
868         err = unlock_request(cs->req);
869         if (err)
870                 return err;
871
872         fuse_copy_finish(cs);
873
874         err = buf->ops->confirm(cs->pipe, buf);
875         if (err)
876                 return err;
877
878         BUG_ON(!cs->nr_segs);
879         cs->currbuf = buf;
880         cs->len = buf->len;
881         cs->pipebufs++;
882         cs->nr_segs--;
883
884         if (cs->len != PAGE_SIZE)
885                 goto out_fallback;
886
887         if (buf->ops->steal(cs->pipe, buf) != 0)
888                 goto out_fallback;
889
890         newpage = buf->page;
891
892         if (!PageUptodate(newpage))
893                 SetPageUptodate(newpage);
894
895         ClearPageMappedToDisk(newpage);
896
897         if (fuse_check_page(newpage) != 0)
898                 goto out_fallback_unlock;
899
900         /*
901          * This is a new and locked page, it shouldn't be mapped or
902          * have any special flags on it
903          */
904         if (WARN_ON(page_mapped(oldpage)))
905                 goto out_fallback_unlock;
906         if (WARN_ON(page_has_private(oldpage)))
907                 goto out_fallback_unlock;
908         if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
909                 goto out_fallback_unlock;
910         if (WARN_ON(PageMlocked(oldpage)))
911                 goto out_fallback_unlock;
912
913         err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
914         if (err) {
915                 unlock_page(newpage);
916                 return err;
917         }
918
919         page_cache_get(newpage);
920
921         if (!(buf->flags & PIPE_BUF_FLAG_LRU))
922                 lru_cache_add_file(newpage);
923
924         err = 0;
925         spin_lock(&cs->req->waitq.lock);
926         if (test_bit(FR_ABORTED, &cs->req->flags))
927                 err = -ENOENT;
928         else
929                 *pagep = newpage;
930         spin_unlock(&cs->req->waitq.lock);
931
932         if (err) {
933                 unlock_page(newpage);
934                 page_cache_release(newpage);
935                 return err;
936         }
937
938         unlock_page(oldpage);
939         page_cache_release(oldpage);
940         cs->len = 0;
941
942         return 0;
943
944 out_fallback_unlock:
945         unlock_page(newpage);
946 out_fallback:
947         cs->pg = buf->page;
948         cs->offset = buf->offset;
949
950         err = lock_request(cs->req);
951         if (err)
952                 return err;
953
954         return 1;
955 }
956
957 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
958                          unsigned offset, unsigned count)
959 {
960         struct pipe_buffer *buf;
961         int err;
962
963         if (cs->nr_segs == cs->pipe->buffers)
964                 return -EIO;
965
966         err = unlock_request(cs->req);
967         if (err)
968                 return err;
969
970         fuse_copy_finish(cs);
971
972         buf = cs->pipebufs;
973         page_cache_get(page);
974         buf->page = page;
975         buf->offset = offset;
976         buf->len = count;
977
978         cs->pipebufs++;
979         cs->nr_segs++;
980         cs->len = 0;
981
982         return 0;
983 }
984
985 /*
986  * Copy a page in the request to/from the userspace buffer.  Must be
987  * done atomically
988  */
989 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
990                           unsigned offset, unsigned count, int zeroing)
991 {
992         int err;
993         struct page *page = *pagep;
994
995         if (page && zeroing && count < PAGE_SIZE)
996                 clear_highpage(page);
997
998         while (count) {
999                 if (cs->write && cs->pipebufs && page) {
1000                         return fuse_ref_page(cs, page, offset, count);
1001                 } else if (!cs->len) {
1002                         if (cs->move_pages && page &&
1003                             offset == 0 && count == PAGE_SIZE) {
1004                                 err = fuse_try_move_page(cs, pagep);
1005                                 if (err <= 0)
1006                                         return err;
1007                         } else {
1008                                 err = fuse_copy_fill(cs);
1009                                 if (err)
1010                                         return err;
1011                         }
1012                 }
1013                 if (page) {
1014                         void *mapaddr = kmap_atomic(page);
1015                         void *buf = mapaddr + offset;
1016                         offset += fuse_copy_do(cs, &buf, &count);
1017                         kunmap_atomic(mapaddr);
1018                 } else
1019                         offset += fuse_copy_do(cs, NULL, &count);
1020         }
1021         if (page && !cs->write)
1022                 flush_dcache_page(page);
1023         return 0;
1024 }
1025
1026 /* Copy pages in the request to/from userspace buffer */
1027 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
1028                            int zeroing)
1029 {
1030         unsigned i;
1031         struct fuse_req *req = cs->req;
1032
1033         for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
1034                 int err;
1035                 unsigned offset = req->page_descs[i].offset;
1036                 unsigned count = min(nbytes, req->page_descs[i].length);
1037
1038                 err = fuse_copy_page(cs, &req->pages[i], offset, count,
1039                                      zeroing);
1040                 if (err)
1041                         return err;
1042
1043                 nbytes -= count;
1044         }
1045         return 0;
1046 }
1047
1048 /* Copy a single argument in the request to/from userspace buffer */
1049 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1050 {
1051         while (size) {
1052                 if (!cs->len) {
1053                         int err = fuse_copy_fill(cs);
1054                         if (err)
1055                                 return err;
1056                 }
1057                 fuse_copy_do(cs, &val, &size);
1058         }
1059         return 0;
1060 }
1061
1062 /* Copy request arguments to/from userspace buffer */
1063 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1064                           unsigned argpages, struct fuse_arg *args,
1065                           int zeroing)
1066 {
1067         int err = 0;
1068         unsigned i;
1069
1070         for (i = 0; !err && i < numargs; i++)  {
1071                 struct fuse_arg *arg = &args[i];
1072                 if (i == numargs - 1 && argpages)
1073                         err = fuse_copy_pages(cs, arg->size, zeroing);
1074                 else
1075                         err = fuse_copy_one(cs, arg->value, arg->size);
1076         }
1077         return err;
1078 }
1079
1080 static int forget_pending(struct fuse_iqueue *fiq)
1081 {
1082         return fiq->forget_list_head.next != NULL;
1083 }
1084
1085 static int request_pending(struct fuse_iqueue *fiq)
1086 {
1087         return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1088                 forget_pending(fiq);
1089 }
1090
1091 /*
1092  * Transfer an interrupt request to userspace
1093  *
1094  * Unlike other requests this is assembled on demand, without a need
1095  * to allocate a separate fuse_req structure.
1096  *
1097  * Called with fiq->waitq.lock held, releases it
1098  */
1099 static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1100                                struct fuse_copy_state *cs,
1101                                size_t nbytes, struct fuse_req *req)
1102 __releases(fiq->waitq.lock)
1103 {
1104         struct fuse_in_header ih;
1105         struct fuse_interrupt_in arg;
1106         unsigned reqsize = sizeof(ih) + sizeof(arg);
1107         int err;
1108
1109         list_del_init(&req->intr_entry);
1110         req->intr_unique = fuse_get_unique(fiq);
1111         memset(&ih, 0, sizeof(ih));
1112         memset(&arg, 0, sizeof(arg));
1113         ih.len = reqsize;
1114         ih.opcode = FUSE_INTERRUPT;
1115         ih.unique = req->intr_unique;
1116         arg.unique = req->in.h.unique;
1117
1118         spin_unlock(&fiq->waitq.lock);
1119         if (nbytes < reqsize)
1120                 return -EINVAL;
1121
1122         err = fuse_copy_one(cs, &ih, sizeof(ih));
1123         if (!err)
1124                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1125         fuse_copy_finish(cs);
1126
1127         return err ? err : reqsize;
1128 }
1129
1130 static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
1131                                                unsigned max,
1132                                                unsigned *countp)
1133 {
1134         struct fuse_forget_link *head = fiq->forget_list_head.next;
1135         struct fuse_forget_link **newhead = &head;
1136         unsigned count;
1137
1138         for (count = 0; *newhead != NULL && count < max; count++)
1139                 newhead = &(*newhead)->next;
1140
1141         fiq->forget_list_head.next = *newhead;
1142         *newhead = NULL;
1143         if (fiq->forget_list_head.next == NULL)
1144                 fiq->forget_list_tail = &fiq->forget_list_head;
1145
1146         if (countp != NULL)
1147                 *countp = count;
1148
1149         return head;
1150 }
1151
1152 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1153                                    struct fuse_copy_state *cs,
1154                                    size_t nbytes)
1155 __releases(fiq->waitq.lock)
1156 {
1157         int err;
1158         struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
1159         struct fuse_forget_in arg = {
1160                 .nlookup = forget->forget_one.nlookup,
1161         };
1162         struct fuse_in_header ih = {
1163                 .opcode = FUSE_FORGET,
1164                 .nodeid = forget->forget_one.nodeid,
1165                 .unique = fuse_get_unique(fiq),
1166                 .len = sizeof(ih) + sizeof(arg),
1167         };
1168
1169         spin_unlock(&fiq->waitq.lock);
1170         kfree(forget);
1171         if (nbytes < ih.len)
1172                 return -EINVAL;
1173
1174         err = fuse_copy_one(cs, &ih, sizeof(ih));
1175         if (!err)
1176                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1177         fuse_copy_finish(cs);
1178
1179         if (err)
1180                 return err;
1181
1182         return ih.len;
1183 }
1184
1185 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1186                                    struct fuse_copy_state *cs, size_t nbytes)
1187 __releases(fiq->waitq.lock)
1188 {
1189         int err;
1190         unsigned max_forgets;
1191         unsigned count;
1192         struct fuse_forget_link *head;
1193         struct fuse_batch_forget_in arg = { .count = 0 };
1194         struct fuse_in_header ih = {
1195                 .opcode = FUSE_BATCH_FORGET,
1196                 .unique = fuse_get_unique(fiq),
1197                 .len = sizeof(ih) + sizeof(arg),
1198         };
1199
1200         if (nbytes < ih.len) {
1201                 spin_unlock(&fiq->waitq.lock);
1202                 return -EINVAL;
1203         }
1204
1205         max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1206         head = dequeue_forget(fiq, max_forgets, &count);
1207         spin_unlock(&fiq->waitq.lock);
1208
1209         arg.count = count;
1210         ih.len += count * sizeof(struct fuse_forget_one);
1211         err = fuse_copy_one(cs, &ih, sizeof(ih));
1212         if (!err)
1213                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1214
1215         while (head) {
1216                 struct fuse_forget_link *forget = head;
1217
1218                 if (!err) {
1219                         err = fuse_copy_one(cs, &forget->forget_one,
1220                                             sizeof(forget->forget_one));
1221                 }
1222                 head = forget->next;
1223                 kfree(forget);
1224         }
1225
1226         fuse_copy_finish(cs);
1227
1228         if (err)
1229                 return err;
1230
1231         return ih.len;
1232 }
1233
1234 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1235                             struct fuse_copy_state *cs,
1236                             size_t nbytes)
1237 __releases(fiq->waitq.lock)
1238 {
1239         if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1240                 return fuse_read_single_forget(fiq, cs, nbytes);
1241         else
1242                 return fuse_read_batch_forget(fiq, cs, nbytes);
1243 }
1244
1245 /*
1246  * Read a single request into the userspace filesystem's buffer.  This
1247  * function waits until a request is available, then removes it from
1248  * the pending list and copies request data to userspace buffer.  If
1249  * no reply is needed (FORGET) or request has been aborted or there
1250  * was an error during the copying then it's finished by calling
1251  * request_end().  Otherwise add it to the processing list, and set
1252  * the 'sent' flag.
1253  */
1254 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1255                                 struct fuse_copy_state *cs, size_t nbytes)
1256 {
1257         ssize_t err;
1258         struct fuse_conn *fc = fud->fc;
1259         struct fuse_iqueue *fiq = &fc->iq;
1260         struct fuse_pqueue *fpq = &fud->pq;
1261         struct fuse_req *req;
1262         struct fuse_in *in;
1263         unsigned reqsize;
1264
1265  restart:
1266         spin_lock(&fiq->waitq.lock);
1267         err = -EAGAIN;
1268         if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
1269             !request_pending(fiq))
1270                 goto err_unlock;
1271
1272         err = wait_event_interruptible_exclusive_locked(fiq->waitq,
1273                                 !fiq->connected || request_pending(fiq));
1274         if (err)
1275                 goto err_unlock;
1276
1277         err = -ENODEV;
1278         if (!fiq->connected)
1279                 goto err_unlock;
1280
1281         if (!list_empty(&fiq->interrupts)) {
1282                 req = list_entry(fiq->interrupts.next, struct fuse_req,
1283                                  intr_entry);
1284                 return fuse_read_interrupt(fiq, cs, nbytes, req);
1285         }
1286
1287         if (forget_pending(fiq)) {
1288                 if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1289                         return fuse_read_forget(fc, fiq, cs, nbytes);
1290
1291                 if (fiq->forget_batch <= -8)
1292                         fiq->forget_batch = 16;
1293         }
1294
1295         req = list_entry(fiq->pending.next, struct fuse_req, list);
1296         clear_bit(FR_PENDING, &req->flags);
1297         list_del_init(&req->list);
1298         spin_unlock(&fiq->waitq.lock);
1299
1300         in = &req->in;
1301         reqsize = in->h.len;
1302         /* If request is too large, reply with an error and restart the read */
1303         if (nbytes < reqsize) {
1304                 req->out.h.error = -EIO;
1305                 /* SETXATTR is special, since it may contain too large data */
1306                 if (in->h.opcode == FUSE_SETXATTR)
1307                         req->out.h.error = -E2BIG;
1308                 request_end(fc, req);
1309                 goto restart;
1310         }
1311         spin_lock(&fpq->lock);
1312         list_add(&req->list, &fpq->io);
1313         spin_unlock(&fpq->lock);
1314         cs->req = req;
1315         err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1316         if (!err)
1317                 err = fuse_copy_args(cs, in->numargs, in->argpages,
1318                                      (struct fuse_arg *) in->args, 0);
1319         fuse_copy_finish(cs);
1320         spin_lock(&fpq->lock);
1321         clear_bit(FR_LOCKED, &req->flags);
1322         if (!fpq->connected) {
1323                 err = -ENODEV;
1324                 goto out_end;
1325         }
1326         if (err) {
1327                 req->out.h.error = -EIO;
1328                 goto out_end;
1329         }
1330         if (!test_bit(FR_ISREPLY, &req->flags)) {
1331                 err = reqsize;
1332                 goto out_end;
1333         }
1334         list_move_tail(&req->list, &fpq->processing);
1335         spin_unlock(&fpq->lock);
1336         set_bit(FR_SENT, &req->flags);
1337         /* matches barrier in request_wait_answer() */
1338         smp_mb__after_atomic();
1339         if (test_bit(FR_INTERRUPTED, &req->flags))
1340                 queue_interrupt(fiq, req);
1341
1342         return reqsize;
1343
1344 out_end:
1345         if (!test_bit(FR_PRIVATE, &req->flags))
1346                 list_del_init(&req->list);
1347         spin_unlock(&fpq->lock);
1348         request_end(fc, req);
1349         return err;
1350
1351  err_unlock:
1352         spin_unlock(&fiq->waitq.lock);
1353         return err;
1354 }
1355
1356 static int fuse_dev_open(struct inode *inode, struct file *file)
1357 {
1358         /*
1359          * The fuse device's file's private_data is used to hold
1360          * the fuse_conn(ection) when it is mounted, and is used to
1361          * keep track of whether the file has been mounted already.
1362          */
1363         file->private_data = NULL;
1364         return 0;
1365 }
1366
1367 static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1368 {
1369         struct fuse_copy_state cs;
1370         struct file *file = iocb->ki_filp;
1371         struct fuse_dev *fud = fuse_get_dev(file);
1372
1373         if (!fud)
1374                 return -EPERM;
1375
1376         if (!iter_is_iovec(to))
1377                 return -EINVAL;
1378
1379         fuse_copy_init(&cs, 1, to);
1380
1381         return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1382 }
1383
1384 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1385                                     struct pipe_inode_info *pipe,
1386                                     size_t len, unsigned int flags)
1387 {
1388         int ret;
1389         int page_nr = 0;
1390         int do_wakeup = 0;
1391         struct pipe_buffer *bufs;
1392         struct fuse_copy_state cs;
1393         struct fuse_dev *fud = fuse_get_dev(in);
1394
1395         if (!fud)
1396                 return -EPERM;
1397
1398         bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1399         if (!bufs)
1400                 return -ENOMEM;
1401
1402         fuse_copy_init(&cs, 1, NULL);
1403         cs.pipebufs = bufs;
1404         cs.pipe = pipe;
1405         ret = fuse_dev_do_read(fud, in, &cs, len);
1406         if (ret < 0)
1407                 goto out;
1408
1409         ret = 0;
1410         pipe_lock(pipe);
1411
1412         if (!pipe->readers) {
1413                 send_sig(SIGPIPE, current, 0);
1414                 if (!ret)
1415                         ret = -EPIPE;
1416                 goto out_unlock;
1417         }
1418
1419         if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1420                 ret = -EIO;
1421                 goto out_unlock;
1422         }
1423
1424         while (page_nr < cs.nr_segs) {
1425                 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1426                 struct pipe_buffer *buf = pipe->bufs + newbuf;
1427
1428                 buf->page = bufs[page_nr].page;
1429                 buf->offset = bufs[page_nr].offset;
1430                 buf->len = bufs[page_nr].len;
1431                 /*
1432                  * Need to be careful about this.  Having buf->ops in module
1433                  * code can Oops if the buffer persists after module unload.
1434                  */
1435                 buf->ops = &nosteal_pipe_buf_ops;
1436
1437                 pipe->nrbufs++;
1438                 page_nr++;
1439                 ret += buf->len;
1440
1441                 if (pipe->files)
1442                         do_wakeup = 1;
1443         }
1444
1445 out_unlock:
1446         pipe_unlock(pipe);
1447
1448         if (do_wakeup) {
1449                 smp_mb();
1450                 if (waitqueue_active(&pipe->wait))
1451                         wake_up_interruptible(&pipe->wait);
1452                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1453         }
1454
1455 out:
1456         for (; page_nr < cs.nr_segs; page_nr++)
1457                 page_cache_release(bufs[page_nr].page);
1458
1459         kfree(bufs);
1460         return ret;
1461 }
1462
1463 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1464                             struct fuse_copy_state *cs)
1465 {
1466         struct fuse_notify_poll_wakeup_out outarg;
1467         int err = -EINVAL;
1468
1469         if (size != sizeof(outarg))
1470                 goto err;
1471
1472         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1473         if (err)
1474                 goto err;
1475
1476         fuse_copy_finish(cs);
1477         return fuse_notify_poll_wakeup(fc, &outarg);
1478
1479 err:
1480         fuse_copy_finish(cs);
1481         return err;
1482 }
1483
1484 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1485                                    struct fuse_copy_state *cs)
1486 {
1487         struct fuse_notify_inval_inode_out outarg;
1488         int err = -EINVAL;
1489
1490         if (size != sizeof(outarg))
1491                 goto err;
1492
1493         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1494         if (err)
1495                 goto err;
1496         fuse_copy_finish(cs);
1497
1498         down_read(&fc->killsb);
1499         err = -ENOENT;
1500         if (fc->sb) {
1501                 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1502                                                outarg.off, outarg.len);
1503         }
1504         up_read(&fc->killsb);
1505         return err;
1506
1507 err:
1508         fuse_copy_finish(cs);
1509         return err;
1510 }
1511
1512 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1513                                    struct fuse_copy_state *cs)
1514 {
1515         struct fuse_notify_inval_entry_out outarg;
1516         int err = -ENOMEM;
1517         char *buf;
1518         struct qstr name;
1519
1520         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1521         if (!buf)
1522                 goto err;
1523
1524         err = -EINVAL;
1525         if (size < sizeof(outarg))
1526                 goto err;
1527
1528         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1529         if (err)
1530                 goto err;
1531
1532         err = -ENAMETOOLONG;
1533         if (outarg.namelen > FUSE_NAME_MAX)
1534                 goto err;
1535
1536         err = -EINVAL;
1537         if (size != sizeof(outarg) + outarg.namelen + 1)
1538                 goto err;
1539
1540         name.name = buf;
1541         name.len = outarg.namelen;
1542         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1543         if (err)
1544                 goto err;
1545         fuse_copy_finish(cs);
1546         buf[outarg.namelen] = 0;
1547         name.hash = full_name_hash(name.name, name.len);
1548
1549         down_read(&fc->killsb);
1550         err = -ENOENT;
1551         if (fc->sb)
1552                 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1553         up_read(&fc->killsb);
1554         kfree(buf);
1555         return err;
1556
1557 err:
1558         kfree(buf);
1559         fuse_copy_finish(cs);
1560         return err;
1561 }
1562
1563 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1564                               struct fuse_copy_state *cs)
1565 {
1566         struct fuse_notify_delete_out outarg;
1567         int err = -ENOMEM;
1568         char *buf;
1569         struct qstr name;
1570
1571         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1572         if (!buf)
1573                 goto err;
1574
1575         err = -EINVAL;
1576         if (size < sizeof(outarg))
1577                 goto err;
1578
1579         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1580         if (err)
1581                 goto err;
1582
1583         err = -ENAMETOOLONG;
1584         if (outarg.namelen > FUSE_NAME_MAX)
1585                 goto err;
1586
1587         err = -EINVAL;
1588         if (size != sizeof(outarg) + outarg.namelen + 1)
1589                 goto err;
1590
1591         name.name = buf;
1592         name.len = outarg.namelen;
1593         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1594         if (err)
1595                 goto err;
1596         fuse_copy_finish(cs);
1597         buf[outarg.namelen] = 0;
1598         name.hash = full_name_hash(name.name, name.len);
1599
1600         down_read(&fc->killsb);
1601         err = -ENOENT;
1602         if (fc->sb)
1603                 err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1604                                                outarg.child, &name);
1605         up_read(&fc->killsb);
1606         kfree(buf);
1607         return err;
1608
1609 err:
1610         kfree(buf);
1611         fuse_copy_finish(cs);
1612         return err;
1613 }
1614
1615 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1616                              struct fuse_copy_state *cs)
1617 {
1618         struct fuse_notify_store_out outarg;
1619         struct inode *inode;
1620         struct address_space *mapping;
1621         u64 nodeid;
1622         int err;
1623         pgoff_t index;
1624         unsigned int offset;
1625         unsigned int num;
1626         loff_t file_size;
1627         loff_t end;
1628
1629         err = -EINVAL;
1630         if (size < sizeof(outarg))
1631                 goto out_finish;
1632
1633         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1634         if (err)
1635                 goto out_finish;
1636
1637         err = -EINVAL;
1638         if (size - sizeof(outarg) != outarg.size)
1639                 goto out_finish;
1640
1641         nodeid = outarg.nodeid;
1642
1643         down_read(&fc->killsb);
1644
1645         err = -ENOENT;
1646         if (!fc->sb)
1647                 goto out_up_killsb;
1648
1649         inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1650         if (!inode)
1651                 goto out_up_killsb;
1652
1653         mapping = inode->i_mapping;
1654         index = outarg.offset >> PAGE_CACHE_SHIFT;
1655         offset = outarg.offset & ~PAGE_CACHE_MASK;
1656         file_size = i_size_read(inode);
1657         end = outarg.offset + outarg.size;
1658         if (end > file_size) {
1659                 file_size = end;
1660                 fuse_write_update_size(inode, file_size);
1661         }
1662
1663         num = outarg.size;
1664         while (num) {
1665                 struct page *page;
1666                 unsigned int this_num;
1667
1668                 err = -ENOMEM;
1669                 page = find_or_create_page(mapping, index,
1670                                            mapping_gfp_mask(mapping));
1671                 if (!page)
1672                         goto out_iput;
1673
1674                 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1675                 err = fuse_copy_page(cs, &page, offset, this_num, 0);
1676                 if (!err && offset == 0 &&
1677                     (this_num == PAGE_CACHE_SIZE || file_size == end))
1678                         SetPageUptodate(page);
1679                 unlock_page(page);
1680                 page_cache_release(page);
1681
1682                 if (err)
1683                         goto out_iput;
1684
1685                 num -= this_num;
1686                 offset = 0;
1687                 index++;
1688         }
1689
1690         err = 0;
1691
1692 out_iput:
1693         iput(inode);
1694 out_up_killsb:
1695         up_read(&fc->killsb);
1696 out_finish:
1697         fuse_copy_finish(cs);
1698         return err;
1699 }
1700
1701 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1702 {
1703         release_pages(req->pages, req->num_pages, false);
1704 }
1705
1706 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1707                          struct fuse_notify_retrieve_out *outarg)
1708 {
1709         int err;
1710         struct address_space *mapping = inode->i_mapping;
1711         struct fuse_req *req;
1712         pgoff_t index;
1713         loff_t file_size;
1714         unsigned int num;
1715         unsigned int offset;
1716         size_t total_len = 0;
1717         int num_pages;
1718
1719         offset = outarg->offset & ~PAGE_CACHE_MASK;
1720         file_size = i_size_read(inode);
1721
1722         num = outarg->size;
1723         if (outarg->offset > file_size)
1724                 num = 0;
1725         else if (outarg->offset + num > file_size)
1726                 num = file_size - outarg->offset;
1727
1728         num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1729         num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1730
1731         req = fuse_get_req(fc, num_pages);
1732         if (IS_ERR(req))
1733                 return PTR_ERR(req);
1734
1735         req->in.h.opcode = FUSE_NOTIFY_REPLY;
1736         req->in.h.nodeid = outarg->nodeid;
1737         req->in.numargs = 2;
1738         req->in.argpages = 1;
1739         req->page_descs[0].offset = offset;
1740         req->end = fuse_retrieve_end;
1741
1742         index = outarg->offset >> PAGE_CACHE_SHIFT;
1743
1744         while (num && req->num_pages < num_pages) {
1745                 struct page *page;
1746                 unsigned int this_num;
1747
1748                 page = find_get_page(mapping, index);
1749                 if (!page)
1750                         break;
1751
1752                 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1753                 req->pages[req->num_pages] = page;
1754                 req->page_descs[req->num_pages].length = this_num;
1755                 req->num_pages++;
1756
1757                 offset = 0;
1758                 num -= this_num;
1759                 total_len += this_num;
1760                 index++;
1761         }
1762         req->misc.retrieve_in.offset = outarg->offset;
1763         req->misc.retrieve_in.size = total_len;
1764         req->in.args[0].size = sizeof(req->misc.retrieve_in);
1765         req->in.args[0].value = &req->misc.retrieve_in;
1766         req->in.args[1].size = total_len;
1767
1768         err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1769         if (err)
1770                 fuse_retrieve_end(fc, req);
1771
1772         return err;
1773 }
1774
1775 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1776                                 struct fuse_copy_state *cs)
1777 {
1778         struct fuse_notify_retrieve_out outarg;
1779         struct inode *inode;
1780         int err;
1781
1782         err = -EINVAL;
1783         if (size != sizeof(outarg))
1784                 goto copy_finish;
1785
1786         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1787         if (err)
1788                 goto copy_finish;
1789
1790         fuse_copy_finish(cs);
1791
1792         down_read(&fc->killsb);
1793         err = -ENOENT;
1794         if (fc->sb) {
1795                 u64 nodeid = outarg.nodeid;
1796
1797                 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1798                 if (inode) {
1799                         err = fuse_retrieve(fc, inode, &outarg);
1800                         iput(inode);
1801                 }
1802         }
1803         up_read(&fc->killsb);
1804
1805         return err;
1806
1807 copy_finish:
1808         fuse_copy_finish(cs);
1809         return err;
1810 }
1811
1812 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1813                        unsigned int size, struct fuse_copy_state *cs)
1814 {
1815         /* Don't try to move pages (yet) */
1816         cs->move_pages = 0;
1817
1818         switch (code) {
1819         case FUSE_NOTIFY_POLL:
1820                 return fuse_notify_poll(fc, size, cs);
1821
1822         case FUSE_NOTIFY_INVAL_INODE:
1823                 return fuse_notify_inval_inode(fc, size, cs);
1824
1825         case FUSE_NOTIFY_INVAL_ENTRY:
1826                 return fuse_notify_inval_entry(fc, size, cs);
1827
1828         case FUSE_NOTIFY_STORE:
1829                 return fuse_notify_store(fc, size, cs);
1830
1831         case FUSE_NOTIFY_RETRIEVE:
1832                 return fuse_notify_retrieve(fc, size, cs);
1833
1834         case FUSE_NOTIFY_DELETE:
1835                 return fuse_notify_delete(fc, size, cs);
1836
1837         default:
1838                 fuse_copy_finish(cs);
1839                 return -EINVAL;
1840         }
1841 }
1842
1843 /* Look up request on processing list by unique ID */
1844 static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1845 {
1846         struct fuse_req *req;
1847
1848         list_for_each_entry(req, &fpq->processing, list) {
1849                 if (req->in.h.unique == unique || req->intr_unique == unique)
1850                         return req;
1851         }
1852         return NULL;
1853 }
1854
1855 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1856                          unsigned nbytes)
1857 {
1858         unsigned reqsize = sizeof(struct fuse_out_header);
1859
1860         if (out->h.error)
1861                 return nbytes != reqsize ? -EINVAL : 0;
1862
1863         reqsize += len_args(out->numargs, out->args);
1864
1865         if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1866                 return -EINVAL;
1867         else if (reqsize > nbytes) {
1868                 struct fuse_arg *lastarg = &out->args[out->numargs-1];
1869                 unsigned diffsize = reqsize - nbytes;
1870                 if (diffsize > lastarg->size)
1871                         return -EINVAL;
1872                 lastarg->size -= diffsize;
1873         }
1874         return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1875                               out->page_zeroing);
1876 }
1877
1878 /*
1879  * Write a single reply to a request.  First the header is copied from
1880  * the write buffer.  The request is then searched on the processing
1881  * list by the unique ID found in the header.  If found, then remove
1882  * it from the list and copy the rest of the buffer to the request.
1883  * The request is finished by calling request_end()
1884  */
1885 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1886                                  struct fuse_copy_state *cs, size_t nbytes)
1887 {
1888         int err;
1889         struct fuse_conn *fc = fud->fc;
1890         struct fuse_pqueue *fpq = &fud->pq;
1891         struct fuse_req *req;
1892         struct fuse_out_header oh;
1893
1894         if (nbytes < sizeof(struct fuse_out_header))
1895                 return -EINVAL;
1896
1897         err = fuse_copy_one(cs, &oh, sizeof(oh));
1898         if (err)
1899                 goto err_finish;
1900
1901         err = -EINVAL;
1902         if (oh.len != nbytes)
1903                 goto err_finish;
1904
1905         /*
1906          * Zero oh.unique indicates unsolicited notification message
1907          * and error contains notification code.
1908          */
1909         if (!oh.unique) {
1910                 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1911                 return err ? err : nbytes;
1912         }
1913
1914         err = -EINVAL;
1915         if (oh.error <= -1000 || oh.error > 0)
1916                 goto err_finish;
1917
1918         spin_lock(&fpq->lock);
1919         err = -ENOENT;
1920         if (!fpq->connected)
1921                 goto err_unlock_pq;
1922
1923         req = request_find(fpq, oh.unique);
1924         if (!req)
1925                 goto err_unlock_pq;
1926
1927         /* Is it an interrupt reply? */
1928         if (req->intr_unique == oh.unique) {
1929                 spin_unlock(&fpq->lock);
1930
1931                 err = -EINVAL;
1932                 if (nbytes != sizeof(struct fuse_out_header))
1933                         goto err_finish;
1934
1935                 if (oh.error == -ENOSYS)
1936                         fc->no_interrupt = 1;
1937                 else if (oh.error == -EAGAIN)
1938                         queue_interrupt(&fc->iq, req);
1939
1940                 fuse_copy_finish(cs);
1941                 return nbytes;
1942         }
1943
1944         clear_bit(FR_SENT, &req->flags);
1945         list_move(&req->list, &fpq->io);
1946         req->out.h = oh;
1947         set_bit(FR_LOCKED, &req->flags);
1948         spin_unlock(&fpq->lock);
1949         cs->req = req;
1950         if (!req->out.page_replace)
1951                 cs->move_pages = 0;
1952
1953         err = copy_out_args(cs, &req->out, nbytes);
1954         if (req->in.h.opcode == FUSE_CANONICAL_PATH) {
1955                 char *path = (char *)req->out.args[0].value;
1956
1957                 path[req->out.args[0].size - 1] = 0;
1958                 req->out.h.error = kern_path(path, 0, req->canonical_path);
1959         }
1960         fuse_copy_finish(cs);
1961
1962         spin_lock(&fpq->lock);
1963         clear_bit(FR_LOCKED, &req->flags);
1964         if (!fpq->connected)
1965                 err = -ENOENT;
1966         else if (err)
1967                 req->out.h.error = -EIO;
1968         if (!test_bit(FR_PRIVATE, &req->flags))
1969                 list_del_init(&req->list);
1970         spin_unlock(&fpq->lock);
1971
1972         request_end(fc, req);
1973
1974         return err ? err : nbytes;
1975
1976  err_unlock_pq:
1977         spin_unlock(&fpq->lock);
1978  err_finish:
1979         fuse_copy_finish(cs);
1980         return err;
1981 }
1982
1983 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1984 {
1985         struct fuse_copy_state cs;
1986         struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
1987
1988         if (!fud)
1989                 return -EPERM;
1990
1991         if (!iter_is_iovec(from))
1992                 return -EINVAL;
1993
1994         fuse_copy_init(&cs, 0, from);
1995
1996         return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
1997 }
1998
1999 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2000                                      struct file *out, loff_t *ppos,
2001                                      size_t len, unsigned int flags)
2002 {
2003         unsigned nbuf;
2004         unsigned idx;
2005         struct pipe_buffer *bufs;
2006         struct fuse_copy_state cs;
2007         struct fuse_dev *fud;
2008         size_t rem;
2009         ssize_t ret;
2010
2011         fud = fuse_get_dev(out);
2012         if (!fud)
2013                 return -EPERM;
2014
2015         pipe_lock(pipe);
2016
2017         bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
2018         if (!bufs) {
2019                 pipe_unlock(pipe);
2020                 return -ENOMEM;
2021         }
2022
2023         nbuf = 0;
2024         rem = 0;
2025         for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
2026                 rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
2027
2028         ret = -EINVAL;
2029         if (rem < len) {
2030                 pipe_unlock(pipe);
2031                 goto out;
2032         }
2033
2034         rem = len;
2035         while (rem) {
2036                 struct pipe_buffer *ibuf;
2037                 struct pipe_buffer *obuf;
2038
2039                 BUG_ON(nbuf >= pipe->buffers);
2040                 BUG_ON(!pipe->nrbufs);
2041                 ibuf = &pipe->bufs[pipe->curbuf];
2042                 obuf = &bufs[nbuf];
2043
2044                 if (rem >= ibuf->len) {
2045                         *obuf = *ibuf;
2046                         ibuf->ops = NULL;
2047                         pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
2048                         pipe->nrbufs--;
2049                 } else {
2050                         ibuf->ops->get(pipe, ibuf);
2051                         *obuf = *ibuf;
2052                         obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2053                         obuf->len = rem;
2054                         ibuf->offset += obuf->len;
2055                         ibuf->len -= obuf->len;
2056                 }
2057                 nbuf++;
2058                 rem -= obuf->len;
2059         }
2060         pipe_unlock(pipe);
2061
2062         fuse_copy_init(&cs, 0, NULL);
2063         cs.pipebufs = bufs;
2064         cs.nr_segs = nbuf;
2065         cs.pipe = pipe;
2066
2067         if (flags & SPLICE_F_MOVE)
2068                 cs.move_pages = 1;
2069
2070         ret = fuse_dev_do_write(fud, &cs, len);
2071
2072         for (idx = 0; idx < nbuf; idx++) {
2073                 struct pipe_buffer *buf = &bufs[idx];
2074                 buf->ops->release(pipe, buf);
2075         }
2076 out:
2077         kfree(bufs);
2078         return ret;
2079 }
2080
2081 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
2082 {
2083         unsigned mask = POLLOUT | POLLWRNORM;
2084         struct fuse_iqueue *fiq;
2085         struct fuse_dev *fud = fuse_get_dev(file);
2086
2087         if (!fud)
2088                 return POLLERR;
2089
2090         fiq = &fud->fc->iq;
2091         poll_wait(file, &fiq->waitq, wait);
2092
2093         spin_lock(&fiq->waitq.lock);
2094         if (!fiq->connected)
2095                 mask = POLLERR;
2096         else if (request_pending(fiq))
2097                 mask |= POLLIN | POLLRDNORM;
2098         spin_unlock(&fiq->waitq.lock);
2099
2100         return mask;
2101 }
2102
2103 /*
2104  * Abort all requests on the given list (pending or processing)
2105  *
2106  * This function releases and reacquires fc->lock
2107  */
2108 static void end_requests(struct fuse_conn *fc, struct list_head *head)
2109 {
2110         while (!list_empty(head)) {
2111                 struct fuse_req *req;
2112                 req = list_entry(head->next, struct fuse_req, list);
2113                 req->out.h.error = -ECONNABORTED;
2114                 clear_bit(FR_SENT, &req->flags);
2115                 list_del_init(&req->list);
2116                 request_end(fc, req);
2117         }
2118 }
2119
2120 static void end_polls(struct fuse_conn *fc)
2121 {
2122         struct rb_node *p;
2123
2124         p = rb_first(&fc->polled_files);
2125
2126         while (p) {
2127                 struct fuse_file *ff;
2128                 ff = rb_entry(p, struct fuse_file, polled_node);
2129                 wake_up_interruptible_all(&ff->poll_wait);
2130
2131                 p = rb_next(p);
2132         }
2133 }
2134
2135 /*
2136  * Abort all requests.
2137  *
2138  * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2139  * filesystem.
2140  *
2141  * The same effect is usually achievable through killing the filesystem daemon
2142  * and all users of the filesystem.  The exception is the combination of an
2143  * asynchronous request and the tricky deadlock (see
2144  * Documentation/filesystems/fuse.txt).
2145  *
2146  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2147  * requests, they should be finished off immediately.  Locked requests will be
2148  * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2149  * requests.  It is possible that some request will finish before we can.  This
2150  * is OK, the request will in that case be removed from the list before we touch
2151  * it.
2152  */
2153 void fuse_abort_conn(struct fuse_conn *fc)
2154 {
2155         struct fuse_iqueue *fiq = &fc->iq;
2156
2157         spin_lock(&fc->lock);
2158         if (fc->connected) {
2159                 struct fuse_dev *fud;
2160                 struct fuse_req *req, *next;
2161                 LIST_HEAD(to_end1);
2162                 LIST_HEAD(to_end2);
2163
2164                 fc->connected = 0;
2165                 fc->blocked = 0;
2166                 fuse_set_initialized(fc);
2167                 list_for_each_entry(fud, &fc->devices, entry) {
2168                         struct fuse_pqueue *fpq = &fud->pq;
2169
2170                         spin_lock(&fpq->lock);
2171                         fpq->connected = 0;
2172                         list_for_each_entry_safe(req, next, &fpq->io, list) {
2173                                 req->out.h.error = -ECONNABORTED;
2174                                 spin_lock(&req->waitq.lock);
2175                                 set_bit(FR_ABORTED, &req->flags);
2176                                 if (!test_bit(FR_LOCKED, &req->flags)) {
2177                                         set_bit(FR_PRIVATE, &req->flags);
2178                                         __fuse_get_request(req);
2179                                         list_move(&req->list, &to_end1);
2180                                 }
2181                                 spin_unlock(&req->waitq.lock);
2182                         }
2183                         list_splice_init(&fpq->processing, &to_end2);
2184                         spin_unlock(&fpq->lock);
2185                 }
2186                 fc->max_background = UINT_MAX;
2187                 flush_bg_queue(fc);
2188
2189                 spin_lock(&fiq->waitq.lock);
2190                 fiq->connected = 0;
2191                 list_splice_init(&fiq->pending, &to_end2);
2192                 list_for_each_entry(req, &to_end2, list)
2193                         clear_bit(FR_PENDING, &req->flags);
2194                 while (forget_pending(fiq))
2195                         kfree(dequeue_forget(fiq, 1, NULL));
2196                 wake_up_all_locked(&fiq->waitq);
2197                 spin_unlock(&fiq->waitq.lock);
2198                 kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2199                 end_polls(fc);
2200                 wake_up_all(&fc->blocked_waitq);
2201                 spin_unlock(&fc->lock);
2202
2203                 while (!list_empty(&to_end1)) {
2204                         req = list_first_entry(&to_end1, struct fuse_req, list);
2205                         list_del_init(&req->list);
2206                         request_end(fc, req);
2207                 }
2208                 end_requests(fc, &to_end2);
2209         } else {
2210                 spin_unlock(&fc->lock);
2211         }
2212 }
2213 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2214
2215 void fuse_wait_aborted(struct fuse_conn *fc)
2216 {
2217         wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
2218 }
2219
2220 int fuse_dev_release(struct inode *inode, struct file *file)
2221 {
2222         struct fuse_dev *fud = fuse_get_dev(file);
2223
2224         if (fud) {
2225                 struct fuse_conn *fc = fud->fc;
2226                 struct fuse_pqueue *fpq = &fud->pq;
2227                 LIST_HEAD(to_end);
2228
2229                 spin_lock(&fpq->lock);
2230                 WARN_ON(!list_empty(&fpq->io));
2231                 list_splice_init(&fpq->processing, &to_end);
2232                 spin_unlock(&fpq->lock);
2233
2234                 end_requests(fc, &to_end);
2235
2236                 /* Are we the last open device? */
2237                 if (atomic_dec_and_test(&fc->dev_count)) {
2238                         WARN_ON(fc->iq.fasync != NULL);
2239                         fuse_abort_conn(fc);
2240                 }
2241                 fuse_dev_free(fud);
2242         }
2243         return 0;
2244 }
2245 EXPORT_SYMBOL_GPL(fuse_dev_release);
2246
2247 static int fuse_dev_fasync(int fd, struct file *file, int on)
2248 {
2249         struct fuse_dev *fud = fuse_get_dev(file);
2250
2251         if (!fud)
2252                 return -EPERM;
2253
2254         /* No locking - fasync_helper does its own locking */
2255         return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2256 }
2257
2258 static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2259 {
2260         struct fuse_dev *fud;
2261
2262         if (new->private_data)
2263                 return -EINVAL;
2264
2265         fud = fuse_dev_alloc(fc);
2266         if (!fud)
2267                 return -ENOMEM;
2268
2269         new->private_data = fud;
2270         atomic_inc(&fc->dev_count);
2271
2272         return 0;
2273 }
2274
2275 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2276                            unsigned long arg)
2277 {
2278         int err = -ENOTTY;
2279
2280         if (cmd == FUSE_DEV_IOC_CLONE) {
2281                 int oldfd;
2282
2283                 err = -EFAULT;
2284                 if (!get_user(oldfd, (__u32 __user *) arg)) {
2285                         struct file *old = fget(oldfd);
2286
2287                         err = -EINVAL;
2288                         if (old) {
2289                                 struct fuse_dev *fud = NULL;
2290
2291                                 /*
2292                                  * Check against file->f_op because CUSE
2293                                  * uses the same ioctl handler.
2294                                  */
2295                                 if (old->f_op == file->f_op &&
2296                                     old->f_cred->user_ns == file->f_cred->user_ns)
2297                                         fud = fuse_get_dev(old);
2298
2299                                 if (fud) {
2300                                         mutex_lock(&fuse_mutex);
2301                                         err = fuse_device_clone(fud->fc, file);
2302                                         mutex_unlock(&fuse_mutex);
2303                                 }
2304                                 fput(old);
2305                         }
2306                 }
2307         }
2308         return err;
2309 }
2310
2311 const struct file_operations fuse_dev_operations = {
2312         .owner          = THIS_MODULE,
2313         .open           = fuse_dev_open,
2314         .llseek         = no_llseek,
2315         .read_iter      = fuse_dev_read,
2316         .splice_read    = fuse_dev_splice_read,
2317         .write_iter     = fuse_dev_write,
2318         .splice_write   = fuse_dev_splice_write,
2319         .poll           = fuse_dev_poll,
2320         .release        = fuse_dev_release,
2321         .fasync         = fuse_dev_fasync,
2322         .unlocked_ioctl = fuse_dev_ioctl,
2323         .compat_ioctl   = fuse_dev_ioctl,
2324 };
2325 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2326
2327 static struct miscdevice fuse_miscdevice = {
2328         .minor = FUSE_MINOR,
2329         .name  = "fuse",
2330         .fops = &fuse_dev_operations,
2331 };
2332
2333 int __init fuse_dev_init(void)
2334 {
2335         int err = -ENOMEM;
2336         fuse_req_cachep = kmem_cache_create("fuse_request",
2337                                             sizeof(struct fuse_req),
2338                                             0, 0, NULL);
2339         if (!fuse_req_cachep)
2340                 goto out;
2341
2342         err = misc_register(&fuse_miscdevice);
2343         if (err)
2344                 goto out_cache_clean;
2345
2346         return 0;
2347
2348  out_cache_clean:
2349         kmem_cache_destroy(fuse_req_cachep);
2350  out:
2351         return err;
2352 }
2353
2354 void fuse_dev_cleanup(void)
2355 {
2356         misc_deregister(&fuse_miscdevice);
2357         kmem_cache_destroy(fuse_req_cachep);
2358 }