OSDN Git Service

Merge tag 'nfsd-6.2-5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
[tomoyo/tomoyo-test1.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->ses->ses_lock);
52         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53                 spin_unlock(&tcon->ses->ses_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->ses->ses_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
213                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
214                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
215 {
216         int rc;
217         int desired_access;
218         int disposition;
219         int create_options = CREATE_NOT_DIR;
220         struct TCP_Server_Info *server = tcon->ses->server;
221         struct cifs_open_parms oparms;
222
223         if (!server->ops->open)
224                 return -ENOSYS;
225
226         desired_access = cifs_convert_flags(f_flags);
227
228 /*********************************************************************
229  *  open flag mapping table:
230  *
231  *      POSIX Flag            CIFS Disposition
232  *      ----------            ----------------
233  *      O_CREAT               FILE_OPEN_IF
234  *      O_CREAT | O_EXCL      FILE_CREATE
235  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
236  *      O_TRUNC               FILE_OVERWRITE
237  *      none of the above     FILE_OPEN
238  *
239  *      Note that there is not a direct match between disposition
240  *      FILE_SUPERSEDE (ie create whether or not file exists although
241  *      O_CREAT | O_TRUNC is similar but truncates the existing
242  *      file rather than creating a new file as FILE_SUPERSEDE does
243  *      (which uses the attributes / metadata passed in on open call)
244  *?
245  *?  O_SYNC is a reasonable match to CIFS writethrough flag
246  *?  and the read write flags match reasonably.  O_LARGEFILE
247  *?  is irrelevant because largefile support is always used
248  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
249  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
250  *********************************************************************/
251
252         disposition = cifs_get_disposition(f_flags);
253
254         /* BB pass O_SYNC flag through on file attributes .. BB */
255
256         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
257         if (f_flags & O_SYNC)
258                 create_options |= CREATE_WRITE_THROUGH;
259
260         if (f_flags & O_DIRECT)
261                 create_options |= CREATE_NO_BUFFER;
262
263         oparms.tcon = tcon;
264         oparms.cifs_sb = cifs_sb;
265         oparms.desired_access = desired_access;
266         oparms.create_options = cifs_create_options(cifs_sb, create_options);
267         oparms.disposition = disposition;
268         oparms.path = full_path;
269         oparms.fid = fid;
270         oparms.reconnect = false;
271
272         rc = server->ops->open(xid, &oparms, oplock, buf);
273         if (rc)
274                 return rc;
275
276         /* TODO: Add support for calling posix query info but with passing in fid */
277         if (tcon->unix_ext)
278                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
279                                               xid);
280         else
281                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
282                                          xid, fid);
283
284         if (rc) {
285                 server->ops->close(xid, tcon, fid);
286                 if (rc == -ESTALE)
287                         rc = -EOPENSTALE;
288         }
289
290         return rc;
291 }
292
293 static bool
294 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
295 {
296         struct cifs_fid_locks *cur;
297         bool has_locks = false;
298
299         down_read(&cinode->lock_sem);
300         list_for_each_entry(cur, &cinode->llist, llist) {
301                 if (!list_empty(&cur->locks)) {
302                         has_locks = true;
303                         break;
304                 }
305         }
306         up_read(&cinode->lock_sem);
307         return has_locks;
308 }
309
310 void
311 cifs_down_write(struct rw_semaphore *sem)
312 {
313         while (!down_write_trylock(sem))
314                 msleep(10);
315 }
316
317 static void cifsFileInfo_put_work(struct work_struct *work);
318
319 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
320                                        struct tcon_link *tlink, __u32 oplock,
321                                        const char *symlink_target)
322 {
323         struct dentry *dentry = file_dentry(file);
324         struct inode *inode = d_inode(dentry);
325         struct cifsInodeInfo *cinode = CIFS_I(inode);
326         struct cifsFileInfo *cfile;
327         struct cifs_fid_locks *fdlocks;
328         struct cifs_tcon *tcon = tlink_tcon(tlink);
329         struct TCP_Server_Info *server = tcon->ses->server;
330
331         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
332         if (cfile == NULL)
333                 return cfile;
334
335         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
336         if (!fdlocks) {
337                 kfree(cfile);
338                 return NULL;
339         }
340
341         if (symlink_target) {
342                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
343                 if (!cfile->symlink_target) {
344                         kfree(fdlocks);
345                         kfree(cfile);
346                         return NULL;
347                 }
348         }
349
350         INIT_LIST_HEAD(&fdlocks->locks);
351         fdlocks->cfile = cfile;
352         cfile->llist = fdlocks;
353
354         cfile->count = 1;
355         cfile->pid = current->tgid;
356         cfile->uid = current_fsuid();
357         cfile->dentry = dget(dentry);
358         cfile->f_flags = file->f_flags;
359         cfile->invalidHandle = false;
360         cfile->deferred_close_scheduled = false;
361         cfile->tlink = cifs_get_tlink(tlink);
362         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
363         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
364         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
365         mutex_init(&cfile->fh_mutex);
366         spin_lock_init(&cfile->file_info_lock);
367
368         cifs_sb_active(inode->i_sb);
369
370         /*
371          * If the server returned a read oplock and we have mandatory brlocks,
372          * set oplock level to None.
373          */
374         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
375                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
376                 oplock = 0;
377         }
378
379         cifs_down_write(&cinode->lock_sem);
380         list_add(&fdlocks->llist, &cinode->llist);
381         up_write(&cinode->lock_sem);
382
383         spin_lock(&tcon->open_file_lock);
384         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
385                 oplock = fid->pending_open->oplock;
386         list_del(&fid->pending_open->olist);
387
388         fid->purge_cache = false;
389         server->ops->set_fid(cfile, fid, oplock);
390
391         list_add(&cfile->tlist, &tcon->openFileList);
392         atomic_inc(&tcon->num_local_opens);
393
394         /* if readable file instance put first in list*/
395         spin_lock(&cinode->open_file_lock);
396         if (file->f_mode & FMODE_READ)
397                 list_add(&cfile->flist, &cinode->openFileList);
398         else
399                 list_add_tail(&cfile->flist, &cinode->openFileList);
400         spin_unlock(&cinode->open_file_lock);
401         spin_unlock(&tcon->open_file_lock);
402
403         if (fid->purge_cache)
404                 cifs_zap_mapping(inode);
405
406         file->private_data = cfile;
407         return cfile;
408 }
409
410 struct cifsFileInfo *
411 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
412 {
413         spin_lock(&cifs_file->file_info_lock);
414         cifsFileInfo_get_locked(cifs_file);
415         spin_unlock(&cifs_file->file_info_lock);
416         return cifs_file;
417 }
418
419 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
420 {
421         struct inode *inode = d_inode(cifs_file->dentry);
422         struct cifsInodeInfo *cifsi = CIFS_I(inode);
423         struct cifsLockInfo *li, *tmp;
424         struct super_block *sb = inode->i_sb;
425
426         /*
427          * Delete any outstanding lock records. We'll lose them when the file
428          * is closed anyway.
429          */
430         cifs_down_write(&cifsi->lock_sem);
431         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
432                 list_del(&li->llist);
433                 cifs_del_lock_waiters(li);
434                 kfree(li);
435         }
436         list_del(&cifs_file->llist->llist);
437         kfree(cifs_file->llist);
438         up_write(&cifsi->lock_sem);
439
440         cifs_put_tlink(cifs_file->tlink);
441         dput(cifs_file->dentry);
442         cifs_sb_deactive(sb);
443         kfree(cifs_file->symlink_target);
444         kfree(cifs_file);
445 }
446
447 static void cifsFileInfo_put_work(struct work_struct *work)
448 {
449         struct cifsFileInfo *cifs_file = container_of(work,
450                         struct cifsFileInfo, put);
451
452         cifsFileInfo_put_final(cifs_file);
453 }
454
455 /**
456  * cifsFileInfo_put - release a reference of file priv data
457  *
458  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
459  *
460  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
461  */
462 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
463 {
464         _cifsFileInfo_put(cifs_file, true, true);
465 }
466
467 /**
468  * _cifsFileInfo_put - release a reference of file priv data
469  *
470  * This may involve closing the filehandle @cifs_file out on the
471  * server. Must be called without holding tcon->open_file_lock,
472  * cinode->open_file_lock and cifs_file->file_info_lock.
473  *
474  * If @wait_for_oplock_handler is true and we are releasing the last
475  * reference, wait for any running oplock break handler of the file
476  * and cancel any pending one.
477  *
478  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
479  * @wait_oplock_handler: must be false if called from oplock_break_handler
480  * @offload:    not offloaded on close and oplock breaks
481  *
482  */
483 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
484                        bool wait_oplock_handler, bool offload)
485 {
486         struct inode *inode = d_inode(cifs_file->dentry);
487         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
488         struct TCP_Server_Info *server = tcon->ses->server;
489         struct cifsInodeInfo *cifsi = CIFS_I(inode);
490         struct super_block *sb = inode->i_sb;
491         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
492         struct cifs_fid fid = {};
493         struct cifs_pending_open open;
494         bool oplock_break_cancelled;
495
496         spin_lock(&tcon->open_file_lock);
497         spin_lock(&cifsi->open_file_lock);
498         spin_lock(&cifs_file->file_info_lock);
499         if (--cifs_file->count > 0) {
500                 spin_unlock(&cifs_file->file_info_lock);
501                 spin_unlock(&cifsi->open_file_lock);
502                 spin_unlock(&tcon->open_file_lock);
503                 return;
504         }
505         spin_unlock(&cifs_file->file_info_lock);
506
507         if (server->ops->get_lease_key)
508                 server->ops->get_lease_key(inode, &fid);
509
510         /* store open in pending opens to make sure we don't miss lease break */
511         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
512
513         /* remove it from the lists */
514         list_del(&cifs_file->flist);
515         list_del(&cifs_file->tlist);
516         atomic_dec(&tcon->num_local_opens);
517
518         if (list_empty(&cifsi->openFileList)) {
519                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
520                          d_inode(cifs_file->dentry));
521                 /*
522                  * In strict cache mode we need invalidate mapping on the last
523                  * close  because it may cause a error when we open this file
524                  * again and get at least level II oplock.
525                  */
526                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
527                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
528                 cifs_set_oplock_level(cifsi, 0);
529         }
530
531         spin_unlock(&cifsi->open_file_lock);
532         spin_unlock(&tcon->open_file_lock);
533
534         oplock_break_cancelled = wait_oplock_handler ?
535                 cancel_work_sync(&cifs_file->oplock_break) : false;
536
537         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
538                 struct TCP_Server_Info *server = tcon->ses->server;
539                 unsigned int xid;
540
541                 xid = get_xid();
542                 if (server->ops->close_getattr)
543                         server->ops->close_getattr(xid, tcon, cifs_file);
544                 else if (server->ops->close)
545                         server->ops->close(xid, tcon, &cifs_file->fid);
546                 _free_xid(xid);
547         }
548
549         if (oplock_break_cancelled)
550                 cifs_done_oplock_break(cifsi);
551
552         cifs_del_pending_open(&open);
553
554         if (offload)
555                 queue_work(fileinfo_put_wq, &cifs_file->put);
556         else
557                 cifsFileInfo_put_final(cifs_file);
558 }
559
560 int cifs_open(struct inode *inode, struct file *file)
561
562 {
563         int rc = -EACCES;
564         unsigned int xid;
565         __u32 oplock;
566         struct cifs_sb_info *cifs_sb;
567         struct TCP_Server_Info *server;
568         struct cifs_tcon *tcon;
569         struct tcon_link *tlink;
570         struct cifsFileInfo *cfile = NULL;
571         void *page;
572         const char *full_path;
573         bool posix_open_ok = false;
574         struct cifs_fid fid = {};
575         struct cifs_pending_open open;
576         struct cifs_open_info_data data = {};
577
578         xid = get_xid();
579
580         cifs_sb = CIFS_SB(inode->i_sb);
581         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
582                 free_xid(xid);
583                 return -EIO;
584         }
585
586         tlink = cifs_sb_tlink(cifs_sb);
587         if (IS_ERR(tlink)) {
588                 free_xid(xid);
589                 return PTR_ERR(tlink);
590         }
591         tcon = tlink_tcon(tlink);
592         server = tcon->ses->server;
593
594         page = alloc_dentry_path();
595         full_path = build_path_from_dentry(file_dentry(file), page);
596         if (IS_ERR(full_path)) {
597                 rc = PTR_ERR(full_path);
598                 goto out;
599         }
600
601         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
602                  inode, file->f_flags, full_path);
603
604         if (file->f_flags & O_DIRECT &&
605             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
606                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
607                         file->f_op = &cifs_file_direct_nobrl_ops;
608                 else
609                         file->f_op = &cifs_file_direct_ops;
610         }
611
612         /* Get the cached handle as SMB2 close is deferred */
613         rc = cifs_get_readable_path(tcon, full_path, &cfile);
614         if (rc == 0) {
615                 if (file->f_flags == cfile->f_flags) {
616                         file->private_data = cfile;
617                         spin_lock(&CIFS_I(inode)->deferred_lock);
618                         cifs_del_deferred_close(cfile);
619                         spin_unlock(&CIFS_I(inode)->deferred_lock);
620                         goto use_cache;
621                 } else {
622                         _cifsFileInfo_put(cfile, true, false);
623                 }
624         }
625
626         if (server->oplocks)
627                 oplock = REQ_OPLOCK;
628         else
629                 oplock = 0;
630
631 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
632         if (!tcon->broken_posix_open && tcon->unix_ext &&
633             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
634                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
635                 /* can not refresh inode info since size could be stale */
636                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
637                                 cifs_sb->ctx->file_mode /* ignored */,
638                                 file->f_flags, &oplock, &fid.netfid, xid);
639                 if (rc == 0) {
640                         cifs_dbg(FYI, "posix open succeeded\n");
641                         posix_open_ok = true;
642                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
643                         if (tcon->ses->serverNOS)
644                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
645                                          tcon->ses->ip_addr,
646                                          tcon->ses->serverNOS);
647                         tcon->broken_posix_open = true;
648                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
649                          (rc != -EOPNOTSUPP)) /* path not found or net err */
650                         goto out;
651                 /*
652                  * Else fallthrough to retry open the old way on network i/o
653                  * or DFS errors.
654                  */
655         }
656 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
657
658         if (server->ops->get_lease_key)
659                 server->ops->get_lease_key(inode, &fid);
660
661         cifs_add_pending_open(&fid, tlink, &open);
662
663         if (!posix_open_ok) {
664                 if (server->ops->get_lease_key)
665                         server->ops->get_lease_key(inode, &fid);
666
667                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
668                                   xid, &data);
669                 if (rc) {
670                         cifs_del_pending_open(&open);
671                         goto out;
672                 }
673         }
674
675         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
676         if (cfile == NULL) {
677                 if (server->ops->close)
678                         server->ops->close(xid, tcon, &fid);
679                 cifs_del_pending_open(&open);
680                 rc = -ENOMEM;
681                 goto out;
682         }
683
684 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
685         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
686                 /*
687                  * Time to set mode which we can not set earlier due to
688                  * problems creating new read-only files.
689                  */
690                 struct cifs_unix_set_info_args args = {
691                         .mode   = inode->i_mode,
692                         .uid    = INVALID_UID, /* no change */
693                         .gid    = INVALID_GID, /* no change */
694                         .ctime  = NO_CHANGE_64,
695                         .atime  = NO_CHANGE_64,
696                         .mtime  = NO_CHANGE_64,
697                         .device = 0,
698                 };
699                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
700                                        cfile->pid);
701         }
702 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
703
704 use_cache:
705         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
706                            file->f_mode & FMODE_WRITE);
707         if (file->f_flags & O_DIRECT &&
708             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
709              file->f_flags & O_APPEND))
710                 cifs_invalidate_cache(file_inode(file),
711                                       FSCACHE_INVAL_DIO_WRITE);
712
713 out:
714         free_dentry_path(page);
715         free_xid(xid);
716         cifs_put_tlink(tlink);
717         cifs_free_open_info(&data);
718         return rc;
719 }
720
721 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
722 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
723 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
724
725 /*
726  * Try to reacquire byte range locks that were released when session
727  * to server was lost.
728  */
729 static int
730 cifs_relock_file(struct cifsFileInfo *cfile)
731 {
732         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
733         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
734         int rc = 0;
735 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
736         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
737 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
738
739         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
740         if (cinode->can_cache_brlcks) {
741                 /* can cache locks - no need to relock */
742                 up_read(&cinode->lock_sem);
743                 return rc;
744         }
745
746 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
747         if (cap_unix(tcon->ses) &&
748             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
749             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
750                 rc = cifs_push_posix_locks(cfile);
751         else
752 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
753                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
754
755         up_read(&cinode->lock_sem);
756         return rc;
757 }
758
759 static int
760 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
761 {
762         int rc = -EACCES;
763         unsigned int xid;
764         __u32 oplock;
765         struct cifs_sb_info *cifs_sb;
766         struct cifs_tcon *tcon;
767         struct TCP_Server_Info *server;
768         struct cifsInodeInfo *cinode;
769         struct inode *inode;
770         void *page;
771         const char *full_path;
772         int desired_access;
773         int disposition = FILE_OPEN;
774         int create_options = CREATE_NOT_DIR;
775         struct cifs_open_parms oparms;
776
777         xid = get_xid();
778         mutex_lock(&cfile->fh_mutex);
779         if (!cfile->invalidHandle) {
780                 mutex_unlock(&cfile->fh_mutex);
781                 free_xid(xid);
782                 return 0;
783         }
784
785         inode = d_inode(cfile->dentry);
786         cifs_sb = CIFS_SB(inode->i_sb);
787         tcon = tlink_tcon(cfile->tlink);
788         server = tcon->ses->server;
789
790         /*
791          * Can not grab rename sem here because various ops, including those
792          * that already have the rename sem can end up causing writepage to get
793          * called and if the server was down that means we end up here, and we
794          * can never tell if the caller already has the rename_sem.
795          */
796         page = alloc_dentry_path();
797         full_path = build_path_from_dentry(cfile->dentry, page);
798         if (IS_ERR(full_path)) {
799                 mutex_unlock(&cfile->fh_mutex);
800                 free_dentry_path(page);
801                 free_xid(xid);
802                 return PTR_ERR(full_path);
803         }
804
805         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
806                  inode, cfile->f_flags, full_path);
807
808         if (tcon->ses->server->oplocks)
809                 oplock = REQ_OPLOCK;
810         else
811                 oplock = 0;
812
813 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
814         if (tcon->unix_ext && cap_unix(tcon->ses) &&
815             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
816                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
817                 /*
818                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
819                  * original open. Must mask them off for a reopen.
820                  */
821                 unsigned int oflags = cfile->f_flags &
822                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
823
824                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
825                                      cifs_sb->ctx->file_mode /* ignored */,
826                                      oflags, &oplock, &cfile->fid.netfid, xid);
827                 if (rc == 0) {
828                         cifs_dbg(FYI, "posix reopen succeeded\n");
829                         oparms.reconnect = true;
830                         goto reopen_success;
831                 }
832                 /*
833                  * fallthrough to retry open the old way on errors, especially
834                  * in the reconnect path it is important to retry hard
835                  */
836         }
837 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
838
839         desired_access = cifs_convert_flags(cfile->f_flags);
840
841         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
842         if (cfile->f_flags & O_SYNC)
843                 create_options |= CREATE_WRITE_THROUGH;
844
845         if (cfile->f_flags & O_DIRECT)
846                 create_options |= CREATE_NO_BUFFER;
847
848         if (server->ops->get_lease_key)
849                 server->ops->get_lease_key(inode, &cfile->fid);
850
851         oparms.tcon = tcon;
852         oparms.cifs_sb = cifs_sb;
853         oparms.desired_access = desired_access;
854         oparms.create_options = cifs_create_options(cifs_sb, create_options);
855         oparms.disposition = disposition;
856         oparms.path = full_path;
857         oparms.fid = &cfile->fid;
858         oparms.reconnect = true;
859
860         /*
861          * Can not refresh inode by passing in file_info buf to be returned by
862          * ops->open and then calling get_inode_info with returned buf since
863          * file might have write behind data that needs to be flushed and server
864          * version of file size can be stale. If we knew for sure that inode was
865          * not dirty locally we could do this.
866          */
867         rc = server->ops->open(xid, &oparms, &oplock, NULL);
868         if (rc == -ENOENT && oparms.reconnect == false) {
869                 /* durable handle timeout is expired - open the file again */
870                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
871                 /* indicate that we need to relock the file */
872                 oparms.reconnect = true;
873         }
874
875         if (rc) {
876                 mutex_unlock(&cfile->fh_mutex);
877                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
878                 cifs_dbg(FYI, "oplock: %d\n", oplock);
879                 goto reopen_error_exit;
880         }
881
882 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
883 reopen_success:
884 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
885         cfile->invalidHandle = false;
886         mutex_unlock(&cfile->fh_mutex);
887         cinode = CIFS_I(inode);
888
889         if (can_flush) {
890                 rc = filemap_write_and_wait(inode->i_mapping);
891                 if (!is_interrupt_error(rc))
892                         mapping_set_error(inode->i_mapping, rc);
893
894                 if (tcon->posix_extensions)
895                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
896                 else if (tcon->unix_ext)
897                         rc = cifs_get_inode_info_unix(&inode, full_path,
898                                                       inode->i_sb, xid);
899                 else
900                         rc = cifs_get_inode_info(&inode, full_path, NULL,
901                                                  inode->i_sb, xid, NULL);
902         }
903         /*
904          * Else we are writing out data to server already and could deadlock if
905          * we tried to flush data, and since we do not know if we have data that
906          * would invalidate the current end of file on the server we can not go
907          * to the server to get the new inode info.
908          */
909
910         /*
911          * If the server returned a read oplock and we have mandatory brlocks,
912          * set oplock level to None.
913          */
914         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
915                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
916                 oplock = 0;
917         }
918
919         server->ops->set_fid(cfile, &cfile->fid, oplock);
920         if (oparms.reconnect)
921                 cifs_relock_file(cfile);
922
923 reopen_error_exit:
924         free_dentry_path(page);
925         free_xid(xid);
926         return rc;
927 }
928
929 void smb2_deferred_work_close(struct work_struct *work)
930 {
931         struct cifsFileInfo *cfile = container_of(work,
932                         struct cifsFileInfo, deferred.work);
933
934         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
935         cifs_del_deferred_close(cfile);
936         cfile->deferred_close_scheduled = false;
937         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
938         _cifsFileInfo_put(cfile, true, false);
939 }
940
941 int cifs_close(struct inode *inode, struct file *file)
942 {
943         struct cifsFileInfo *cfile;
944         struct cifsInodeInfo *cinode = CIFS_I(inode);
945         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
946         struct cifs_deferred_close *dclose;
947
948         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
949
950         if (file->private_data != NULL) {
951                 cfile = file->private_data;
952                 file->private_data = NULL;
953                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
954                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
955                     cinode->lease_granted &&
956                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
957                     dclose) {
958                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
959                                 inode->i_ctime = inode->i_mtime = current_time(inode);
960                         }
961                         spin_lock(&cinode->deferred_lock);
962                         cifs_add_deferred_close(cfile, dclose);
963                         if (cfile->deferred_close_scheduled &&
964                             delayed_work_pending(&cfile->deferred)) {
965                                 /*
966                                  * If there is no pending work, mod_delayed_work queues new work.
967                                  * So, Increase the ref count to avoid use-after-free.
968                                  */
969                                 if (!mod_delayed_work(deferredclose_wq,
970                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
971                                         cifsFileInfo_get(cfile);
972                         } else {
973                                 /* Deferred close for files */
974                                 queue_delayed_work(deferredclose_wq,
975                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
976                                 cfile->deferred_close_scheduled = true;
977                                 spin_unlock(&cinode->deferred_lock);
978                                 return 0;
979                         }
980                         spin_unlock(&cinode->deferred_lock);
981                         _cifsFileInfo_put(cfile, true, false);
982                 } else {
983                         _cifsFileInfo_put(cfile, true, false);
984                         kfree(dclose);
985                 }
986         }
987
988         /* return code from the ->release op is always ignored */
989         return 0;
990 }
991
992 void
993 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
994 {
995         struct cifsFileInfo *open_file, *tmp;
996         struct list_head tmp_list;
997
998         if (!tcon->use_persistent || !tcon->need_reopen_files)
999                 return;
1000
1001         tcon->need_reopen_files = false;
1002
1003         cifs_dbg(FYI, "Reopen persistent handles\n");
1004         INIT_LIST_HEAD(&tmp_list);
1005
1006         /* list all files open on tree connection, reopen resilient handles  */
1007         spin_lock(&tcon->open_file_lock);
1008         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1009                 if (!open_file->invalidHandle)
1010                         continue;
1011                 cifsFileInfo_get(open_file);
1012                 list_add_tail(&open_file->rlist, &tmp_list);
1013         }
1014         spin_unlock(&tcon->open_file_lock);
1015
1016         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1017                 if (cifs_reopen_file(open_file, false /* do not flush */))
1018                         tcon->need_reopen_files = true;
1019                 list_del_init(&open_file->rlist);
1020                 cifsFileInfo_put(open_file);
1021         }
1022 }
1023
1024 int cifs_closedir(struct inode *inode, struct file *file)
1025 {
1026         int rc = 0;
1027         unsigned int xid;
1028         struct cifsFileInfo *cfile = file->private_data;
1029         struct cifs_tcon *tcon;
1030         struct TCP_Server_Info *server;
1031         char *buf;
1032
1033         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1034
1035         if (cfile == NULL)
1036                 return rc;
1037
1038         xid = get_xid();
1039         tcon = tlink_tcon(cfile->tlink);
1040         server = tcon->ses->server;
1041
1042         cifs_dbg(FYI, "Freeing private data in close dir\n");
1043         spin_lock(&cfile->file_info_lock);
1044         if (server->ops->dir_needs_close(cfile)) {
1045                 cfile->invalidHandle = true;
1046                 spin_unlock(&cfile->file_info_lock);
1047                 if (server->ops->close_dir)
1048                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1049                 else
1050                         rc = -ENOSYS;
1051                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1052                 /* not much we can do if it fails anyway, ignore rc */
1053                 rc = 0;
1054         } else
1055                 spin_unlock(&cfile->file_info_lock);
1056
1057         buf = cfile->srch_inf.ntwrk_buf_start;
1058         if (buf) {
1059                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1060                 cfile->srch_inf.ntwrk_buf_start = NULL;
1061                 if (cfile->srch_inf.smallBuf)
1062                         cifs_small_buf_release(buf);
1063                 else
1064                         cifs_buf_release(buf);
1065         }
1066
1067         cifs_put_tlink(cfile->tlink);
1068         kfree(file->private_data);
1069         file->private_data = NULL;
1070         /* BB can we lock the filestruct while this is going on? */
1071         free_xid(xid);
1072         return rc;
1073 }
1074
1075 static struct cifsLockInfo *
1076 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1077 {
1078         struct cifsLockInfo *lock =
1079                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1080         if (!lock)
1081                 return lock;
1082         lock->offset = offset;
1083         lock->length = length;
1084         lock->type = type;
1085         lock->pid = current->tgid;
1086         lock->flags = flags;
1087         INIT_LIST_HEAD(&lock->blist);
1088         init_waitqueue_head(&lock->block_q);
1089         return lock;
1090 }
1091
1092 void
1093 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1094 {
1095         struct cifsLockInfo *li, *tmp;
1096         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1097                 list_del_init(&li->blist);
1098                 wake_up(&li->block_q);
1099         }
1100 }
1101
1102 #define CIFS_LOCK_OP    0
1103 #define CIFS_READ_OP    1
1104 #define CIFS_WRITE_OP   2
1105
1106 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1107 static bool
1108 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1109                             __u64 length, __u8 type, __u16 flags,
1110                             struct cifsFileInfo *cfile,
1111                             struct cifsLockInfo **conf_lock, int rw_check)
1112 {
1113         struct cifsLockInfo *li;
1114         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1115         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1116
1117         list_for_each_entry(li, &fdlocks->locks, llist) {
1118                 if (offset + length <= li->offset ||
1119                     offset >= li->offset + li->length)
1120                         continue;
1121                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1122                     server->ops->compare_fids(cfile, cur_cfile)) {
1123                         /* shared lock prevents write op through the same fid */
1124                         if (!(li->type & server->vals->shared_lock_type) ||
1125                             rw_check != CIFS_WRITE_OP)
1126                                 continue;
1127                 }
1128                 if ((type & server->vals->shared_lock_type) &&
1129                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1130                      current->tgid == li->pid) || type == li->type))
1131                         continue;
1132                 if (rw_check == CIFS_LOCK_OP &&
1133                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1134                     server->ops->compare_fids(cfile, cur_cfile))
1135                         continue;
1136                 if (conf_lock)
1137                         *conf_lock = li;
1138                 return true;
1139         }
1140         return false;
1141 }
1142
1143 bool
1144 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1145                         __u8 type, __u16 flags,
1146                         struct cifsLockInfo **conf_lock, int rw_check)
1147 {
1148         bool rc = false;
1149         struct cifs_fid_locks *cur;
1150         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1151
1152         list_for_each_entry(cur, &cinode->llist, llist) {
1153                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1154                                                  flags, cfile, conf_lock,
1155                                                  rw_check);
1156                 if (rc)
1157                         break;
1158         }
1159
1160         return rc;
1161 }
1162
1163 /*
1164  * Check if there is another lock that prevents us to set the lock (mandatory
1165  * style). If such a lock exists, update the flock structure with its
1166  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1167  * or leave it the same if we can't. Returns 0 if we don't need to request to
1168  * the server or 1 otherwise.
1169  */
1170 static int
1171 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1172                __u8 type, struct file_lock *flock)
1173 {
1174         int rc = 0;
1175         struct cifsLockInfo *conf_lock;
1176         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1177         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1178         bool exist;
1179
1180         down_read(&cinode->lock_sem);
1181
1182         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1183                                         flock->fl_flags, &conf_lock,
1184                                         CIFS_LOCK_OP);
1185         if (exist) {
1186                 flock->fl_start = conf_lock->offset;
1187                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1188                 flock->fl_pid = conf_lock->pid;
1189                 if (conf_lock->type & server->vals->shared_lock_type)
1190                         flock->fl_type = F_RDLCK;
1191                 else
1192                         flock->fl_type = F_WRLCK;
1193         } else if (!cinode->can_cache_brlcks)
1194                 rc = 1;
1195         else
1196                 flock->fl_type = F_UNLCK;
1197
1198         up_read(&cinode->lock_sem);
1199         return rc;
1200 }
1201
1202 static void
1203 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1204 {
1205         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1206         cifs_down_write(&cinode->lock_sem);
1207         list_add_tail(&lock->llist, &cfile->llist->locks);
1208         up_write(&cinode->lock_sem);
1209 }
1210
1211 /*
1212  * Set the byte-range lock (mandatory style). Returns:
1213  * 1) 0, if we set the lock and don't need to request to the server;
1214  * 2) 1, if no locks prevent us but we need to request to the server;
1215  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1216  */
1217 static int
1218 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1219                  bool wait)
1220 {
1221         struct cifsLockInfo *conf_lock;
1222         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1223         bool exist;
1224         int rc = 0;
1225
1226 try_again:
1227         exist = false;
1228         cifs_down_write(&cinode->lock_sem);
1229
1230         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1231                                         lock->type, lock->flags, &conf_lock,
1232                                         CIFS_LOCK_OP);
1233         if (!exist && cinode->can_cache_brlcks) {
1234                 list_add_tail(&lock->llist, &cfile->llist->locks);
1235                 up_write(&cinode->lock_sem);
1236                 return rc;
1237         }
1238
1239         if (!exist)
1240                 rc = 1;
1241         else if (!wait)
1242                 rc = -EACCES;
1243         else {
1244                 list_add_tail(&lock->blist, &conf_lock->blist);
1245                 up_write(&cinode->lock_sem);
1246                 rc = wait_event_interruptible(lock->block_q,
1247                                         (lock->blist.prev == &lock->blist) &&
1248                                         (lock->blist.next == &lock->blist));
1249                 if (!rc)
1250                         goto try_again;
1251                 cifs_down_write(&cinode->lock_sem);
1252                 list_del_init(&lock->blist);
1253         }
1254
1255         up_write(&cinode->lock_sem);
1256         return rc;
1257 }
1258
1259 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1260 /*
1261  * Check if there is another lock that prevents us to set the lock (posix
1262  * style). If such a lock exists, update the flock structure with its
1263  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1264  * or leave it the same if we can't. Returns 0 if we don't need to request to
1265  * the server or 1 otherwise.
1266  */
1267 static int
1268 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1269 {
1270         int rc = 0;
1271         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1272         unsigned char saved_type = flock->fl_type;
1273
1274         if ((flock->fl_flags & FL_POSIX) == 0)
1275                 return 1;
1276
1277         down_read(&cinode->lock_sem);
1278         posix_test_lock(file, flock);
1279
1280         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1281                 flock->fl_type = saved_type;
1282                 rc = 1;
1283         }
1284
1285         up_read(&cinode->lock_sem);
1286         return rc;
1287 }
1288
1289 /*
1290  * Set the byte-range lock (posix style). Returns:
1291  * 1) <0, if the error occurs while setting the lock;
1292  * 2) 0, if we set the lock and don't need to request to the server;
1293  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1294  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1295  */
1296 static int
1297 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1298 {
1299         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1300         int rc = FILE_LOCK_DEFERRED + 1;
1301
1302         if ((flock->fl_flags & FL_POSIX) == 0)
1303                 return rc;
1304
1305         cifs_down_write(&cinode->lock_sem);
1306         if (!cinode->can_cache_brlcks) {
1307                 up_write(&cinode->lock_sem);
1308                 return rc;
1309         }
1310
1311         rc = posix_lock_file(file, flock, NULL);
1312         up_write(&cinode->lock_sem);
1313         return rc;
1314 }
1315
1316 int
1317 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1318 {
1319         unsigned int xid;
1320         int rc = 0, stored_rc;
1321         struct cifsLockInfo *li, *tmp;
1322         struct cifs_tcon *tcon;
1323         unsigned int num, max_num, max_buf;
1324         LOCKING_ANDX_RANGE *buf, *cur;
1325         static const int types[] = {
1326                 LOCKING_ANDX_LARGE_FILES,
1327                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1328         };
1329         int i;
1330
1331         xid = get_xid();
1332         tcon = tlink_tcon(cfile->tlink);
1333
1334         /*
1335          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1336          * and check it before using.
1337          */
1338         max_buf = tcon->ses->server->maxBuf;
1339         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1340                 free_xid(xid);
1341                 return -EINVAL;
1342         }
1343
1344         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1345                      PAGE_SIZE);
1346         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1347                         PAGE_SIZE);
1348         max_num = (max_buf - sizeof(struct smb_hdr)) /
1349                                                 sizeof(LOCKING_ANDX_RANGE);
1350         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1351         if (!buf) {
1352                 free_xid(xid);
1353                 return -ENOMEM;
1354         }
1355
1356         for (i = 0; i < 2; i++) {
1357                 cur = buf;
1358                 num = 0;
1359                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1360                         if (li->type != types[i])
1361                                 continue;
1362                         cur->Pid = cpu_to_le16(li->pid);
1363                         cur->LengthLow = cpu_to_le32((u32)li->length);
1364                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1365                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1366                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1367                         if (++num == max_num) {
1368                                 stored_rc = cifs_lockv(xid, tcon,
1369                                                        cfile->fid.netfid,
1370                                                        (__u8)li->type, 0, num,
1371                                                        buf);
1372                                 if (stored_rc)
1373                                         rc = stored_rc;
1374                                 cur = buf;
1375                                 num = 0;
1376                         } else
1377                                 cur++;
1378                 }
1379
1380                 if (num) {
1381                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1382                                                (__u8)types[i], 0, num, buf);
1383                         if (stored_rc)
1384                                 rc = stored_rc;
1385                 }
1386         }
1387
1388         kfree(buf);
1389         free_xid(xid);
1390         return rc;
1391 }
1392
1393 static __u32
1394 hash_lockowner(fl_owner_t owner)
1395 {
1396         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1397 }
1398 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1399
1400 struct lock_to_push {
1401         struct list_head llist;
1402         __u64 offset;
1403         __u64 length;
1404         __u32 pid;
1405         __u16 netfid;
1406         __u8 type;
1407 };
1408
1409 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1410 static int
1411 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1412 {
1413         struct inode *inode = d_inode(cfile->dentry);
1414         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1415         struct file_lock *flock;
1416         struct file_lock_context *flctx = locks_inode_context(inode);
1417         unsigned int count = 0, i;
1418         int rc = 0, xid, type;
1419         struct list_head locks_to_send, *el;
1420         struct lock_to_push *lck, *tmp;
1421         __u64 length;
1422
1423         xid = get_xid();
1424
1425         if (!flctx)
1426                 goto out;
1427
1428         spin_lock(&flctx->flc_lock);
1429         list_for_each(el, &flctx->flc_posix) {
1430                 count++;
1431         }
1432         spin_unlock(&flctx->flc_lock);
1433
1434         INIT_LIST_HEAD(&locks_to_send);
1435
1436         /*
1437          * Allocating count locks is enough because no FL_POSIX locks can be
1438          * added to the list while we are holding cinode->lock_sem that
1439          * protects locking operations of this inode.
1440          */
1441         for (i = 0; i < count; i++) {
1442                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1443                 if (!lck) {
1444                         rc = -ENOMEM;
1445                         goto err_out;
1446                 }
1447                 list_add_tail(&lck->llist, &locks_to_send);
1448         }
1449
1450         el = locks_to_send.next;
1451         spin_lock(&flctx->flc_lock);
1452         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1453                 if (el == &locks_to_send) {
1454                         /*
1455                          * The list ended. We don't have enough allocated
1456                          * structures - something is really wrong.
1457                          */
1458                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1459                         break;
1460                 }
1461                 length = cifs_flock_len(flock);
1462                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1463                         type = CIFS_RDLCK;
1464                 else
1465                         type = CIFS_WRLCK;
1466                 lck = list_entry(el, struct lock_to_push, llist);
1467                 lck->pid = hash_lockowner(flock->fl_owner);
1468                 lck->netfid = cfile->fid.netfid;
1469                 lck->length = length;
1470                 lck->type = type;
1471                 lck->offset = flock->fl_start;
1472         }
1473         spin_unlock(&flctx->flc_lock);
1474
1475         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1476                 int stored_rc;
1477
1478                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1479                                              lck->offset, lck->length, NULL,
1480                                              lck->type, 0);
1481                 if (stored_rc)
1482                         rc = stored_rc;
1483                 list_del(&lck->llist);
1484                 kfree(lck);
1485         }
1486
1487 out:
1488         free_xid(xid);
1489         return rc;
1490 err_out:
1491         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1492                 list_del(&lck->llist);
1493                 kfree(lck);
1494         }
1495         goto out;
1496 }
1497 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1498
1499 static int
1500 cifs_push_locks(struct cifsFileInfo *cfile)
1501 {
1502         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1503         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1504         int rc = 0;
1505 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1506         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1507 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1508
1509         /* we are going to update can_cache_brlcks here - need a write access */
1510         cifs_down_write(&cinode->lock_sem);
1511         if (!cinode->can_cache_brlcks) {
1512                 up_write(&cinode->lock_sem);
1513                 return rc;
1514         }
1515
1516 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1517         if (cap_unix(tcon->ses) &&
1518             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1519             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1520                 rc = cifs_push_posix_locks(cfile);
1521         else
1522 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1523                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1524
1525         cinode->can_cache_brlcks = false;
1526         up_write(&cinode->lock_sem);
1527         return rc;
1528 }
1529
1530 static void
1531 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1532                 bool *wait_flag, struct TCP_Server_Info *server)
1533 {
1534         if (flock->fl_flags & FL_POSIX)
1535                 cifs_dbg(FYI, "Posix\n");
1536         if (flock->fl_flags & FL_FLOCK)
1537                 cifs_dbg(FYI, "Flock\n");
1538         if (flock->fl_flags & FL_SLEEP) {
1539                 cifs_dbg(FYI, "Blocking lock\n");
1540                 *wait_flag = true;
1541         }
1542         if (flock->fl_flags & FL_ACCESS)
1543                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1544         if (flock->fl_flags & FL_LEASE)
1545                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1546         if (flock->fl_flags &
1547             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1548                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1549                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1550
1551         *type = server->vals->large_lock_type;
1552         if (flock->fl_type == F_WRLCK) {
1553                 cifs_dbg(FYI, "F_WRLCK\n");
1554                 *type |= server->vals->exclusive_lock_type;
1555                 *lock = 1;
1556         } else if (flock->fl_type == F_UNLCK) {
1557                 cifs_dbg(FYI, "F_UNLCK\n");
1558                 *type |= server->vals->unlock_lock_type;
1559                 *unlock = 1;
1560                 /* Check if unlock includes more than one lock range */
1561         } else if (flock->fl_type == F_RDLCK) {
1562                 cifs_dbg(FYI, "F_RDLCK\n");
1563                 *type |= server->vals->shared_lock_type;
1564                 *lock = 1;
1565         } else if (flock->fl_type == F_EXLCK) {
1566                 cifs_dbg(FYI, "F_EXLCK\n");
1567                 *type |= server->vals->exclusive_lock_type;
1568                 *lock = 1;
1569         } else if (flock->fl_type == F_SHLCK) {
1570                 cifs_dbg(FYI, "F_SHLCK\n");
1571                 *type |= server->vals->shared_lock_type;
1572                 *lock = 1;
1573         } else
1574                 cifs_dbg(FYI, "Unknown type of lock\n");
1575 }
1576
1577 static int
1578 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1579            bool wait_flag, bool posix_lck, unsigned int xid)
1580 {
1581         int rc = 0;
1582         __u64 length = cifs_flock_len(flock);
1583         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1584         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1585         struct TCP_Server_Info *server = tcon->ses->server;
1586 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1587         __u16 netfid = cfile->fid.netfid;
1588
1589         if (posix_lck) {
1590                 int posix_lock_type;
1591
1592                 rc = cifs_posix_lock_test(file, flock);
1593                 if (!rc)
1594                         return rc;
1595
1596                 if (type & server->vals->shared_lock_type)
1597                         posix_lock_type = CIFS_RDLCK;
1598                 else
1599                         posix_lock_type = CIFS_WRLCK;
1600                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1601                                       hash_lockowner(flock->fl_owner),
1602                                       flock->fl_start, length, flock,
1603                                       posix_lock_type, wait_flag);
1604                 return rc;
1605         }
1606 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1607
1608         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1609         if (!rc)
1610                 return rc;
1611
1612         /* BB we could chain these into one lock request BB */
1613         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1614                                     1, 0, false);
1615         if (rc == 0) {
1616                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1617                                             type, 0, 1, false);
1618                 flock->fl_type = F_UNLCK;
1619                 if (rc != 0)
1620                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1621                                  rc);
1622                 return 0;
1623         }
1624
1625         if (type & server->vals->shared_lock_type) {
1626                 flock->fl_type = F_WRLCK;
1627                 return 0;
1628         }
1629
1630         type &= ~server->vals->exclusive_lock_type;
1631
1632         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1633                                     type | server->vals->shared_lock_type,
1634                                     1, 0, false);
1635         if (rc == 0) {
1636                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637                         type | server->vals->shared_lock_type, 0, 1, false);
1638                 flock->fl_type = F_RDLCK;
1639                 if (rc != 0)
1640                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1641                                  rc);
1642         } else
1643                 flock->fl_type = F_WRLCK;
1644
1645         return 0;
1646 }
1647
1648 void
1649 cifs_move_llist(struct list_head *source, struct list_head *dest)
1650 {
1651         struct list_head *li, *tmp;
1652         list_for_each_safe(li, tmp, source)
1653                 list_move(li, dest);
1654 }
1655
1656 void
1657 cifs_free_llist(struct list_head *llist)
1658 {
1659         struct cifsLockInfo *li, *tmp;
1660         list_for_each_entry_safe(li, tmp, llist, llist) {
1661                 cifs_del_lock_waiters(li);
1662                 list_del(&li->llist);
1663                 kfree(li);
1664         }
1665 }
1666
1667 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1668 int
1669 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1670                   unsigned int xid)
1671 {
1672         int rc = 0, stored_rc;
1673         static const int types[] = {
1674                 LOCKING_ANDX_LARGE_FILES,
1675                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1676         };
1677         unsigned int i;
1678         unsigned int max_num, num, max_buf;
1679         LOCKING_ANDX_RANGE *buf, *cur;
1680         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1681         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1682         struct cifsLockInfo *li, *tmp;
1683         __u64 length = cifs_flock_len(flock);
1684         struct list_head tmp_llist;
1685
1686         INIT_LIST_HEAD(&tmp_llist);
1687
1688         /*
1689          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1690          * and check it before using.
1691          */
1692         max_buf = tcon->ses->server->maxBuf;
1693         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1694                 return -EINVAL;
1695
1696         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1697                      PAGE_SIZE);
1698         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1699                         PAGE_SIZE);
1700         max_num = (max_buf - sizeof(struct smb_hdr)) /
1701                                                 sizeof(LOCKING_ANDX_RANGE);
1702         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1703         if (!buf)
1704                 return -ENOMEM;
1705
1706         cifs_down_write(&cinode->lock_sem);
1707         for (i = 0; i < 2; i++) {
1708                 cur = buf;
1709                 num = 0;
1710                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1711                         if (flock->fl_start > li->offset ||
1712                             (flock->fl_start + length) <
1713                             (li->offset + li->length))
1714                                 continue;
1715                         if (current->tgid != li->pid)
1716                                 continue;
1717                         if (types[i] != li->type)
1718                                 continue;
1719                         if (cinode->can_cache_brlcks) {
1720                                 /*
1721                                  * We can cache brlock requests - simply remove
1722                                  * a lock from the file's list.
1723                                  */
1724                                 list_del(&li->llist);
1725                                 cifs_del_lock_waiters(li);
1726                                 kfree(li);
1727                                 continue;
1728                         }
1729                         cur->Pid = cpu_to_le16(li->pid);
1730                         cur->LengthLow = cpu_to_le32((u32)li->length);
1731                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1732                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1733                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1734                         /*
1735                          * We need to save a lock here to let us add it again to
1736                          * the file's list if the unlock range request fails on
1737                          * the server.
1738                          */
1739                         list_move(&li->llist, &tmp_llist);
1740                         if (++num == max_num) {
1741                                 stored_rc = cifs_lockv(xid, tcon,
1742                                                        cfile->fid.netfid,
1743                                                        li->type, num, 0, buf);
1744                                 if (stored_rc) {
1745                                         /*
1746                                          * We failed on the unlock range
1747                                          * request - add all locks from the tmp
1748                                          * list to the head of the file's list.
1749                                          */
1750                                         cifs_move_llist(&tmp_llist,
1751                                                         &cfile->llist->locks);
1752                                         rc = stored_rc;
1753                                 } else
1754                                         /*
1755                                          * The unlock range request succeed -
1756                                          * free the tmp list.
1757                                          */
1758                                         cifs_free_llist(&tmp_llist);
1759                                 cur = buf;
1760                                 num = 0;
1761                         } else
1762                                 cur++;
1763                 }
1764                 if (num) {
1765                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1766                                                types[i], num, 0, buf);
1767                         if (stored_rc) {
1768                                 cifs_move_llist(&tmp_llist,
1769                                                 &cfile->llist->locks);
1770                                 rc = stored_rc;
1771                         } else
1772                                 cifs_free_llist(&tmp_llist);
1773                 }
1774         }
1775
1776         up_write(&cinode->lock_sem);
1777         kfree(buf);
1778         return rc;
1779 }
1780 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1781
1782 static int
1783 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1784            bool wait_flag, bool posix_lck, int lock, int unlock,
1785            unsigned int xid)
1786 {
1787         int rc = 0;
1788         __u64 length = cifs_flock_len(flock);
1789         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1790         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1791         struct TCP_Server_Info *server = tcon->ses->server;
1792         struct inode *inode = d_inode(cfile->dentry);
1793
1794 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1795         if (posix_lck) {
1796                 int posix_lock_type;
1797
1798                 rc = cifs_posix_lock_set(file, flock);
1799                 if (rc <= FILE_LOCK_DEFERRED)
1800                         return rc;
1801
1802                 if (type & server->vals->shared_lock_type)
1803                         posix_lock_type = CIFS_RDLCK;
1804                 else
1805                         posix_lock_type = CIFS_WRLCK;
1806
1807                 if (unlock == 1)
1808                         posix_lock_type = CIFS_UNLCK;
1809
1810                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1811                                       hash_lockowner(flock->fl_owner),
1812                                       flock->fl_start, length,
1813                                       NULL, posix_lock_type, wait_flag);
1814                 goto out;
1815         }
1816 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1817         if (lock) {
1818                 struct cifsLockInfo *lock;
1819
1820                 lock = cifs_lock_init(flock->fl_start, length, type,
1821                                       flock->fl_flags);
1822                 if (!lock)
1823                         return -ENOMEM;
1824
1825                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1826                 if (rc < 0) {
1827                         kfree(lock);
1828                         return rc;
1829                 }
1830                 if (!rc)
1831                         goto out;
1832
1833                 /*
1834                  * Windows 7 server can delay breaking lease from read to None
1835                  * if we set a byte-range lock on a file - break it explicitly
1836                  * before sending the lock to the server to be sure the next
1837                  * read won't conflict with non-overlapted locks due to
1838                  * pagereading.
1839                  */
1840                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1841                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1842                         cifs_zap_mapping(inode);
1843                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1844                                  inode);
1845                         CIFS_I(inode)->oplock = 0;
1846                 }
1847
1848                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1849                                             type, 1, 0, wait_flag);
1850                 if (rc) {
1851                         kfree(lock);
1852                         return rc;
1853                 }
1854
1855                 cifs_lock_add(cfile, lock);
1856         } else if (unlock)
1857                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1858
1859 out:
1860         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1861                 /*
1862                  * If this is a request to remove all locks because we
1863                  * are closing the file, it doesn't matter if the
1864                  * unlocking failed as both cifs.ko and the SMB server
1865                  * remove the lock on file close
1866                  */
1867                 if (rc) {
1868                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1869                         if (!(flock->fl_flags & FL_CLOSE))
1870                                 return rc;
1871                 }
1872                 rc = locks_lock_file_wait(file, flock);
1873         }
1874         return rc;
1875 }
1876
1877 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1878 {
1879         int rc, xid;
1880         int lock = 0, unlock = 0;
1881         bool wait_flag = false;
1882         bool posix_lck = false;
1883         struct cifs_sb_info *cifs_sb;
1884         struct cifs_tcon *tcon;
1885         struct cifsFileInfo *cfile;
1886         __u32 type;
1887
1888         xid = get_xid();
1889
1890         if (!(fl->fl_flags & FL_FLOCK)) {
1891                 rc = -ENOLCK;
1892                 free_xid(xid);
1893                 return rc;
1894         }
1895
1896         cfile = (struct cifsFileInfo *)file->private_data;
1897         tcon = tlink_tcon(cfile->tlink);
1898
1899         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1900                         tcon->ses->server);
1901         cifs_sb = CIFS_FILE_SB(file);
1902
1903         if (cap_unix(tcon->ses) &&
1904             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1905             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1906                 posix_lck = true;
1907
1908         if (!lock && !unlock) {
1909                 /*
1910                  * if no lock or unlock then nothing to do since we do not
1911                  * know what it is
1912                  */
1913                 rc = -EOPNOTSUPP;
1914                 free_xid(xid);
1915                 return rc;
1916         }
1917
1918         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1919                         xid);
1920         free_xid(xid);
1921         return rc;
1922
1923
1924 }
1925
1926 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1927 {
1928         int rc, xid;
1929         int lock = 0, unlock = 0;
1930         bool wait_flag = false;
1931         bool posix_lck = false;
1932         struct cifs_sb_info *cifs_sb;
1933         struct cifs_tcon *tcon;
1934         struct cifsFileInfo *cfile;
1935         __u32 type;
1936
1937         rc = -EACCES;
1938         xid = get_xid();
1939
1940         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1941                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1942                  (long long)flock->fl_end);
1943
1944         cfile = (struct cifsFileInfo *)file->private_data;
1945         tcon = tlink_tcon(cfile->tlink);
1946
1947         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1948                         tcon->ses->server);
1949         cifs_sb = CIFS_FILE_SB(file);
1950         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1951
1952         if (cap_unix(tcon->ses) &&
1953             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1954             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1955                 posix_lck = true;
1956         /*
1957          * BB add code here to normalize offset and length to account for
1958          * negative length which we can not accept over the wire.
1959          */
1960         if (IS_GETLK(cmd)) {
1961                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1962                 free_xid(xid);
1963                 return rc;
1964         }
1965
1966         if (!lock && !unlock) {
1967                 /*
1968                  * if no lock or unlock then nothing to do since we do not
1969                  * know what it is
1970                  */
1971                 free_xid(xid);
1972                 return -EOPNOTSUPP;
1973         }
1974
1975         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1976                         xid);
1977         free_xid(xid);
1978         return rc;
1979 }
1980
1981 /*
1982  * update the file size (if needed) after a write. Should be called with
1983  * the inode->i_lock held
1984  */
1985 void
1986 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1987                       unsigned int bytes_written)
1988 {
1989         loff_t end_of_write = offset + bytes_written;
1990
1991         if (end_of_write > cifsi->server_eof)
1992                 cifsi->server_eof = end_of_write;
1993 }
1994
1995 static ssize_t
1996 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1997            size_t write_size, loff_t *offset)
1998 {
1999         int rc = 0;
2000         unsigned int bytes_written = 0;
2001         unsigned int total_written;
2002         struct cifs_tcon *tcon;
2003         struct TCP_Server_Info *server;
2004         unsigned int xid;
2005         struct dentry *dentry = open_file->dentry;
2006         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2007         struct cifs_io_parms io_parms = {0};
2008
2009         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2010                  write_size, *offset, dentry);
2011
2012         tcon = tlink_tcon(open_file->tlink);
2013         server = tcon->ses->server;
2014
2015         if (!server->ops->sync_write)
2016                 return -ENOSYS;
2017
2018         xid = get_xid();
2019
2020         for (total_written = 0; write_size > total_written;
2021              total_written += bytes_written) {
2022                 rc = -EAGAIN;
2023                 while (rc == -EAGAIN) {
2024                         struct kvec iov[2];
2025                         unsigned int len;
2026
2027                         if (open_file->invalidHandle) {
2028                                 /* we could deadlock if we called
2029                                    filemap_fdatawait from here so tell
2030                                    reopen_file not to flush data to
2031                                    server now */
2032                                 rc = cifs_reopen_file(open_file, false);
2033                                 if (rc != 0)
2034                                         break;
2035                         }
2036
2037                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2038                                   (unsigned int)write_size - total_written);
2039                         /* iov[0] is reserved for smb header */
2040                         iov[1].iov_base = (char *)write_data + total_written;
2041                         iov[1].iov_len = len;
2042                         io_parms.pid = pid;
2043                         io_parms.tcon = tcon;
2044                         io_parms.offset = *offset;
2045                         io_parms.length = len;
2046                         rc = server->ops->sync_write(xid, &open_file->fid,
2047                                         &io_parms, &bytes_written, iov, 1);
2048                 }
2049                 if (rc || (bytes_written == 0)) {
2050                         if (total_written)
2051                                 break;
2052                         else {
2053                                 free_xid(xid);
2054                                 return rc;
2055                         }
2056                 } else {
2057                         spin_lock(&d_inode(dentry)->i_lock);
2058                         cifs_update_eof(cifsi, *offset, bytes_written);
2059                         spin_unlock(&d_inode(dentry)->i_lock);
2060                         *offset += bytes_written;
2061                 }
2062         }
2063
2064         cifs_stats_bytes_written(tcon, total_written);
2065
2066         if (total_written > 0) {
2067                 spin_lock(&d_inode(dentry)->i_lock);
2068                 if (*offset > d_inode(dentry)->i_size) {
2069                         i_size_write(d_inode(dentry), *offset);
2070                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2071                 }
2072                 spin_unlock(&d_inode(dentry)->i_lock);
2073         }
2074         mark_inode_dirty_sync(d_inode(dentry));
2075         free_xid(xid);
2076         return total_written;
2077 }
2078
2079 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2080                                         bool fsuid_only)
2081 {
2082         struct cifsFileInfo *open_file = NULL;
2083         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2084
2085         /* only filter by fsuid on multiuser mounts */
2086         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2087                 fsuid_only = false;
2088
2089         spin_lock(&cifs_inode->open_file_lock);
2090         /* we could simply get the first_list_entry since write-only entries
2091            are always at the end of the list but since the first entry might
2092            have a close pending, we go through the whole list */
2093         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2094                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2095                         continue;
2096                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2097                         if ((!open_file->invalidHandle)) {
2098                                 /* found a good file */
2099                                 /* lock it so it will not be closed on us */
2100                                 cifsFileInfo_get(open_file);
2101                                 spin_unlock(&cifs_inode->open_file_lock);
2102                                 return open_file;
2103                         } /* else might as well continue, and look for
2104                              another, or simply have the caller reopen it
2105                              again rather than trying to fix this handle */
2106                 } else /* write only file */
2107                         break; /* write only files are last so must be done */
2108         }
2109         spin_unlock(&cifs_inode->open_file_lock);
2110         return NULL;
2111 }
2112
2113 /* Return -EBADF if no handle is found and general rc otherwise */
2114 int
2115 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2116                        struct cifsFileInfo **ret_file)
2117 {
2118         struct cifsFileInfo *open_file, *inv_file = NULL;
2119         struct cifs_sb_info *cifs_sb;
2120         bool any_available = false;
2121         int rc = -EBADF;
2122         unsigned int refind = 0;
2123         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2124         bool with_delete = flags & FIND_WR_WITH_DELETE;
2125         *ret_file = NULL;
2126
2127         /*
2128          * Having a null inode here (because mapping->host was set to zero by
2129          * the VFS or MM) should not happen but we had reports of on oops (due
2130          * to it being zero) during stress testcases so we need to check for it
2131          */
2132
2133         if (cifs_inode == NULL) {
2134                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2135                 dump_stack();
2136                 return rc;
2137         }
2138
2139         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2140
2141         /* only filter by fsuid on multiuser mounts */
2142         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2143                 fsuid_only = false;
2144
2145         spin_lock(&cifs_inode->open_file_lock);
2146 refind_writable:
2147         if (refind > MAX_REOPEN_ATT) {
2148                 spin_unlock(&cifs_inode->open_file_lock);
2149                 return rc;
2150         }
2151         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2152                 if (!any_available && open_file->pid != current->tgid)
2153                         continue;
2154                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2155                         continue;
2156                 if (with_delete && !(open_file->fid.access & DELETE))
2157                         continue;
2158                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2159                         if (!open_file->invalidHandle) {
2160                                 /* found a good writable file */
2161                                 cifsFileInfo_get(open_file);
2162                                 spin_unlock(&cifs_inode->open_file_lock);
2163                                 *ret_file = open_file;
2164                                 return 0;
2165                         } else {
2166                                 if (!inv_file)
2167                                         inv_file = open_file;
2168                         }
2169                 }
2170         }
2171         /* couldn't find useable FH with same pid, try any available */
2172         if (!any_available) {
2173                 any_available = true;
2174                 goto refind_writable;
2175         }
2176
2177         if (inv_file) {
2178                 any_available = false;
2179                 cifsFileInfo_get(inv_file);
2180         }
2181
2182         spin_unlock(&cifs_inode->open_file_lock);
2183
2184         if (inv_file) {
2185                 rc = cifs_reopen_file(inv_file, false);
2186                 if (!rc) {
2187                         *ret_file = inv_file;
2188                         return 0;
2189                 }
2190
2191                 spin_lock(&cifs_inode->open_file_lock);
2192                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2193                 spin_unlock(&cifs_inode->open_file_lock);
2194                 cifsFileInfo_put(inv_file);
2195                 ++refind;
2196                 inv_file = NULL;
2197                 spin_lock(&cifs_inode->open_file_lock);
2198                 goto refind_writable;
2199         }
2200
2201         return rc;
2202 }
2203
2204 struct cifsFileInfo *
2205 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2206 {
2207         struct cifsFileInfo *cfile;
2208         int rc;
2209
2210         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2211         if (rc)
2212                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2213
2214         return cfile;
2215 }
2216
2217 int
2218 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2219                        int flags,
2220                        struct cifsFileInfo **ret_file)
2221 {
2222         struct cifsFileInfo *cfile;
2223         void *page = alloc_dentry_path();
2224
2225         *ret_file = NULL;
2226
2227         spin_lock(&tcon->open_file_lock);
2228         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2229                 struct cifsInodeInfo *cinode;
2230                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2231                 if (IS_ERR(full_path)) {
2232                         spin_unlock(&tcon->open_file_lock);
2233                         free_dentry_path(page);
2234                         return PTR_ERR(full_path);
2235                 }
2236                 if (strcmp(full_path, name))
2237                         continue;
2238
2239                 cinode = CIFS_I(d_inode(cfile->dentry));
2240                 spin_unlock(&tcon->open_file_lock);
2241                 free_dentry_path(page);
2242                 return cifs_get_writable_file(cinode, flags, ret_file);
2243         }
2244
2245         spin_unlock(&tcon->open_file_lock);
2246         free_dentry_path(page);
2247         return -ENOENT;
2248 }
2249
2250 int
2251 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2252                        struct cifsFileInfo **ret_file)
2253 {
2254         struct cifsFileInfo *cfile;
2255         void *page = alloc_dentry_path();
2256
2257         *ret_file = NULL;
2258
2259         spin_lock(&tcon->open_file_lock);
2260         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2261                 struct cifsInodeInfo *cinode;
2262                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2263                 if (IS_ERR(full_path)) {
2264                         spin_unlock(&tcon->open_file_lock);
2265                         free_dentry_path(page);
2266                         return PTR_ERR(full_path);
2267                 }
2268                 if (strcmp(full_path, name))
2269                         continue;
2270
2271                 cinode = CIFS_I(d_inode(cfile->dentry));
2272                 spin_unlock(&tcon->open_file_lock);
2273                 free_dentry_path(page);
2274                 *ret_file = find_readable_file(cinode, 0);
2275                 return *ret_file ? 0 : -ENOENT;
2276         }
2277
2278         spin_unlock(&tcon->open_file_lock);
2279         free_dentry_path(page);
2280         return -ENOENT;
2281 }
2282
2283 void
2284 cifs_writedata_release(struct kref *refcount)
2285 {
2286         struct cifs_writedata *wdata = container_of(refcount,
2287                                         struct cifs_writedata, refcount);
2288 #ifdef CONFIG_CIFS_SMB_DIRECT
2289         if (wdata->mr) {
2290                 smbd_deregister_mr(wdata->mr);
2291                 wdata->mr = NULL;
2292         }
2293 #endif
2294
2295         if (wdata->cfile)
2296                 cifsFileInfo_put(wdata->cfile);
2297
2298         kvfree(wdata->pages);
2299         kfree(wdata);
2300 }
2301
2302 /*
2303  * Write failed with a retryable error. Resend the write request. It's also
2304  * possible that the page was redirtied so re-clean the page.
2305  */
2306 static void
2307 cifs_writev_requeue(struct cifs_writedata *wdata)
2308 {
2309         int i, rc = 0;
2310         struct inode *inode = d_inode(wdata->cfile->dentry);
2311         struct TCP_Server_Info *server;
2312         unsigned int rest_len;
2313
2314         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2315         i = 0;
2316         rest_len = wdata->bytes;
2317         do {
2318                 struct cifs_writedata *wdata2;
2319                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2320
2321                 wsize = server->ops->wp_retry_size(inode);
2322                 if (wsize < rest_len) {
2323                         nr_pages = wsize / PAGE_SIZE;
2324                         if (!nr_pages) {
2325                                 rc = -EOPNOTSUPP;
2326                                 break;
2327                         }
2328                         cur_len = nr_pages * PAGE_SIZE;
2329                         tailsz = PAGE_SIZE;
2330                 } else {
2331                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2332                         cur_len = rest_len;
2333                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2334                 }
2335
2336                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2337                 if (!wdata2) {
2338                         rc = -ENOMEM;
2339                         break;
2340                 }
2341
2342                 for (j = 0; j < nr_pages; j++) {
2343                         wdata2->pages[j] = wdata->pages[i + j];
2344                         lock_page(wdata2->pages[j]);
2345                         clear_page_dirty_for_io(wdata2->pages[j]);
2346                 }
2347
2348                 wdata2->sync_mode = wdata->sync_mode;
2349                 wdata2->nr_pages = nr_pages;
2350                 wdata2->offset = page_offset(wdata2->pages[0]);
2351                 wdata2->pagesz = PAGE_SIZE;
2352                 wdata2->tailsz = tailsz;
2353                 wdata2->bytes = cur_len;
2354
2355                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2356                                             &wdata2->cfile);
2357                 if (!wdata2->cfile) {
2358                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2359                                  rc);
2360                         if (!is_retryable_error(rc))
2361                                 rc = -EBADF;
2362                 } else {
2363                         wdata2->pid = wdata2->cfile->pid;
2364                         rc = server->ops->async_writev(wdata2,
2365                                                        cifs_writedata_release);
2366                 }
2367
2368                 for (j = 0; j < nr_pages; j++) {
2369                         unlock_page(wdata2->pages[j]);
2370                         if (rc != 0 && !is_retryable_error(rc)) {
2371                                 SetPageError(wdata2->pages[j]);
2372                                 end_page_writeback(wdata2->pages[j]);
2373                                 put_page(wdata2->pages[j]);
2374                         }
2375                 }
2376
2377                 kref_put(&wdata2->refcount, cifs_writedata_release);
2378                 if (rc) {
2379                         if (is_retryable_error(rc))
2380                                 continue;
2381                         i += nr_pages;
2382                         break;
2383                 }
2384
2385                 rest_len -= cur_len;
2386                 i += nr_pages;
2387         } while (i < wdata->nr_pages);
2388
2389         /* cleanup remaining pages from the original wdata */
2390         for (; i < wdata->nr_pages; i++) {
2391                 SetPageError(wdata->pages[i]);
2392                 end_page_writeback(wdata->pages[i]);
2393                 put_page(wdata->pages[i]);
2394         }
2395
2396         if (rc != 0 && !is_retryable_error(rc))
2397                 mapping_set_error(inode->i_mapping, rc);
2398         kref_put(&wdata->refcount, cifs_writedata_release);
2399 }
2400
2401 void
2402 cifs_writev_complete(struct work_struct *work)
2403 {
2404         struct cifs_writedata *wdata = container_of(work,
2405                                                 struct cifs_writedata, work);
2406         struct inode *inode = d_inode(wdata->cfile->dentry);
2407         int i = 0;
2408
2409         if (wdata->result == 0) {
2410                 spin_lock(&inode->i_lock);
2411                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2412                 spin_unlock(&inode->i_lock);
2413                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2414                                          wdata->bytes);
2415         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2416                 return cifs_writev_requeue(wdata);
2417
2418         for (i = 0; i < wdata->nr_pages; i++) {
2419                 struct page *page = wdata->pages[i];
2420
2421                 if (wdata->result == -EAGAIN)
2422                         __set_page_dirty_nobuffers(page);
2423                 else if (wdata->result < 0)
2424                         SetPageError(page);
2425                 end_page_writeback(page);
2426                 cifs_readpage_to_fscache(inode, page);
2427                 put_page(page);
2428         }
2429         if (wdata->result != -EAGAIN)
2430                 mapping_set_error(inode->i_mapping, wdata->result);
2431         kref_put(&wdata->refcount, cifs_writedata_release);
2432 }
2433
2434 struct cifs_writedata *
2435 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2436 {
2437         struct cifs_writedata *writedata = NULL;
2438         struct page **pages =
2439                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2440         if (pages) {
2441                 writedata = cifs_writedata_direct_alloc(pages, complete);
2442                 if (!writedata)
2443                         kvfree(pages);
2444         }
2445
2446         return writedata;
2447 }
2448
2449 struct cifs_writedata *
2450 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2451 {
2452         struct cifs_writedata *wdata;
2453
2454         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2455         if (wdata != NULL) {
2456                 wdata->pages = pages;
2457                 kref_init(&wdata->refcount);
2458                 INIT_LIST_HEAD(&wdata->list);
2459                 init_completion(&wdata->done);
2460                 INIT_WORK(&wdata->work, complete);
2461         }
2462         return wdata;
2463 }
2464
2465
2466 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2467 {
2468         struct address_space *mapping = page->mapping;
2469         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2470         char *write_data;
2471         int rc = -EFAULT;
2472         int bytes_written = 0;
2473         struct inode *inode;
2474         struct cifsFileInfo *open_file;
2475
2476         if (!mapping || !mapping->host)
2477                 return -EFAULT;
2478
2479         inode = page->mapping->host;
2480
2481         offset += (loff_t)from;
2482         write_data = kmap(page);
2483         write_data += from;
2484
2485         if ((to > PAGE_SIZE) || (from > to)) {
2486                 kunmap(page);
2487                 return -EIO;
2488         }
2489
2490         /* racing with truncate? */
2491         if (offset > mapping->host->i_size) {
2492                 kunmap(page);
2493                 return 0; /* don't care */
2494         }
2495
2496         /* check to make sure that we are not extending the file */
2497         if (mapping->host->i_size - offset < (loff_t)to)
2498                 to = (unsigned)(mapping->host->i_size - offset);
2499
2500         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2501                                     &open_file);
2502         if (!rc) {
2503                 bytes_written = cifs_write(open_file, open_file->pid,
2504                                            write_data, to - from, &offset);
2505                 cifsFileInfo_put(open_file);
2506                 /* Does mm or vfs already set times? */
2507                 inode->i_atime = inode->i_mtime = current_time(inode);
2508                 if ((bytes_written > 0) && (offset))
2509                         rc = 0;
2510                 else if (bytes_written < 0)
2511                         rc = bytes_written;
2512                 else
2513                         rc = -EFAULT;
2514         } else {
2515                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2516                 if (!is_retryable_error(rc))
2517                         rc = -EIO;
2518         }
2519
2520         kunmap(page);
2521         return rc;
2522 }
2523
2524 static struct cifs_writedata *
2525 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2526                           pgoff_t end, pgoff_t *index,
2527                           unsigned int *found_pages)
2528 {
2529         struct cifs_writedata *wdata;
2530
2531         wdata = cifs_writedata_alloc((unsigned int)tofind,
2532                                      cifs_writev_complete);
2533         if (!wdata)
2534                 return NULL;
2535
2536         *found_pages = find_get_pages_range_tag(mapping, index, end,
2537                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2538         return wdata;
2539 }
2540
2541 static unsigned int
2542 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2543                     struct address_space *mapping,
2544                     struct writeback_control *wbc,
2545                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2546 {
2547         unsigned int nr_pages = 0, i;
2548         struct page *page;
2549
2550         for (i = 0; i < found_pages; i++) {
2551                 page = wdata->pages[i];
2552                 /*
2553                  * At this point we hold neither the i_pages lock nor the
2554                  * page lock: the page may be truncated or invalidated
2555                  * (changing page->mapping to NULL), or even swizzled
2556                  * back from swapper_space to tmpfs file mapping
2557                  */
2558
2559                 if (nr_pages == 0)
2560                         lock_page(page);
2561                 else if (!trylock_page(page))
2562                         break;
2563
2564                 if (unlikely(page->mapping != mapping)) {
2565                         unlock_page(page);
2566                         break;
2567                 }
2568
2569                 if (!wbc->range_cyclic && page->index > end) {
2570                         *done = true;
2571                         unlock_page(page);
2572                         break;
2573                 }
2574
2575                 if (*next && (page->index != *next)) {
2576                         /* Not next consecutive page */
2577                         unlock_page(page);
2578                         break;
2579                 }
2580
2581                 if (wbc->sync_mode != WB_SYNC_NONE)
2582                         wait_on_page_writeback(page);
2583
2584                 if (PageWriteback(page) ||
2585                                 !clear_page_dirty_for_io(page)) {
2586                         unlock_page(page);
2587                         break;
2588                 }
2589
2590                 /*
2591                  * This actually clears the dirty bit in the radix tree.
2592                  * See cifs_writepage() for more commentary.
2593                  */
2594                 set_page_writeback(page);
2595                 if (page_offset(page) >= i_size_read(mapping->host)) {
2596                         *done = true;
2597                         unlock_page(page);
2598                         end_page_writeback(page);
2599                         break;
2600                 }
2601
2602                 wdata->pages[i] = page;
2603                 *next = page->index + 1;
2604                 ++nr_pages;
2605         }
2606
2607         /* reset index to refind any pages skipped */
2608         if (nr_pages == 0)
2609                 *index = wdata->pages[0]->index + 1;
2610
2611         /* put any pages we aren't going to use */
2612         for (i = nr_pages; i < found_pages; i++) {
2613                 put_page(wdata->pages[i]);
2614                 wdata->pages[i] = NULL;
2615         }
2616
2617         return nr_pages;
2618 }
2619
2620 static int
2621 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2622                  struct address_space *mapping, struct writeback_control *wbc)
2623 {
2624         int rc;
2625
2626         wdata->sync_mode = wbc->sync_mode;
2627         wdata->nr_pages = nr_pages;
2628         wdata->offset = page_offset(wdata->pages[0]);
2629         wdata->pagesz = PAGE_SIZE;
2630         wdata->tailsz = min(i_size_read(mapping->host) -
2631                         page_offset(wdata->pages[nr_pages - 1]),
2632                         (loff_t)PAGE_SIZE);
2633         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2634         wdata->pid = wdata->cfile->pid;
2635
2636         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2637         if (rc)
2638                 return rc;
2639
2640         if (wdata->cfile->invalidHandle)
2641                 rc = -EAGAIN;
2642         else
2643                 rc = wdata->server->ops->async_writev(wdata,
2644                                                       cifs_writedata_release);
2645
2646         return rc;
2647 }
2648
2649 static int
2650 cifs_writepage_locked(struct page *page, struct writeback_control *wbc);
2651
2652 static int cifs_write_one_page(struct page *page, struct writeback_control *wbc,
2653                 void *data)
2654 {
2655         struct address_space *mapping = data;
2656         int ret;
2657
2658         ret = cifs_writepage_locked(page, wbc);
2659         unlock_page(page);
2660         mapping_set_error(mapping, ret);
2661         return ret;
2662 }
2663
2664 static int cifs_writepages(struct address_space *mapping,
2665                            struct writeback_control *wbc)
2666 {
2667         struct inode *inode = mapping->host;
2668         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2669         struct TCP_Server_Info *server;
2670         bool done = false, scanned = false, range_whole = false;
2671         pgoff_t end, index;
2672         struct cifs_writedata *wdata;
2673         struct cifsFileInfo *cfile = NULL;
2674         int rc = 0;
2675         int saved_rc = 0;
2676         unsigned int xid;
2677
2678         /*
2679          * If wsize is smaller than the page cache size, default to writing
2680          * one page at a time.
2681          */
2682         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2683                 return write_cache_pages(mapping, wbc, cifs_write_one_page,
2684                                 mapping);
2685
2686         xid = get_xid();
2687         if (wbc->range_cyclic) {
2688                 index = mapping->writeback_index; /* Start from prev offset */
2689                 end = -1;
2690         } else {
2691                 index = wbc->range_start >> PAGE_SHIFT;
2692                 end = wbc->range_end >> PAGE_SHIFT;
2693                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2694                         range_whole = true;
2695                 scanned = true;
2696         }
2697         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2698
2699 retry:
2700         while (!done && index <= end) {
2701                 unsigned int i, nr_pages, found_pages, wsize;
2702                 pgoff_t next = 0, tofind, saved_index = index;
2703                 struct cifs_credits credits_on_stack;
2704                 struct cifs_credits *credits = &credits_on_stack;
2705                 int get_file_rc = 0;
2706
2707                 if (cfile)
2708                         cifsFileInfo_put(cfile);
2709
2710                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2711
2712                 /* in case of an error store it to return later */
2713                 if (rc)
2714                         get_file_rc = rc;
2715
2716                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2717                                                    &wsize, credits);
2718                 if (rc != 0) {
2719                         done = true;
2720                         break;
2721                 }
2722
2723                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2724
2725                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2726                                                   &found_pages);
2727                 if (!wdata) {
2728                         rc = -ENOMEM;
2729                         done = true;
2730                         add_credits_and_wake_if(server, credits, 0);
2731                         break;
2732                 }
2733
2734                 if (found_pages == 0) {
2735                         kref_put(&wdata->refcount, cifs_writedata_release);
2736                         add_credits_and_wake_if(server, credits, 0);
2737                         break;
2738                 }
2739
2740                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2741                                                end, &index, &next, &done);
2742
2743                 /* nothing to write? */
2744                 if (nr_pages == 0) {
2745                         kref_put(&wdata->refcount, cifs_writedata_release);
2746                         add_credits_and_wake_if(server, credits, 0);
2747                         continue;
2748                 }
2749
2750                 wdata->credits = credits_on_stack;
2751                 wdata->cfile = cfile;
2752                 wdata->server = server;
2753                 cfile = NULL;
2754
2755                 if (!wdata->cfile) {
2756                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2757                                  get_file_rc);
2758                         if (is_retryable_error(get_file_rc))
2759                                 rc = get_file_rc;
2760                         else
2761                                 rc = -EBADF;
2762                 } else
2763                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2764
2765                 for (i = 0; i < nr_pages; ++i)
2766                         unlock_page(wdata->pages[i]);
2767
2768                 /* send failure -- clean up the mess */
2769                 if (rc != 0) {
2770                         add_credits_and_wake_if(server, &wdata->credits, 0);
2771                         for (i = 0; i < nr_pages; ++i) {
2772                                 if (is_retryable_error(rc))
2773                                         redirty_page_for_writepage(wbc,
2774                                                            wdata->pages[i]);
2775                                 else
2776                                         SetPageError(wdata->pages[i]);
2777                                 end_page_writeback(wdata->pages[i]);
2778                                 put_page(wdata->pages[i]);
2779                         }
2780                         if (!is_retryable_error(rc))
2781                                 mapping_set_error(mapping, rc);
2782                 }
2783                 kref_put(&wdata->refcount, cifs_writedata_release);
2784
2785                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2786                         index = saved_index;
2787                         continue;
2788                 }
2789
2790                 /* Return immediately if we received a signal during writing */
2791                 if (is_interrupt_error(rc)) {
2792                         done = true;
2793                         break;
2794                 }
2795
2796                 if (rc != 0 && saved_rc == 0)
2797                         saved_rc = rc;
2798
2799                 wbc->nr_to_write -= nr_pages;
2800                 if (wbc->nr_to_write <= 0)
2801                         done = true;
2802
2803                 index = next;
2804         }
2805
2806         if (!scanned && !done) {
2807                 /*
2808                  * We hit the last page and there is more work to be done: wrap
2809                  * back to the start of the file
2810                  */
2811                 scanned = true;
2812                 index = 0;
2813                 goto retry;
2814         }
2815
2816         if (saved_rc != 0)
2817                 rc = saved_rc;
2818
2819         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2820                 mapping->writeback_index = index;
2821
2822         if (cfile)
2823                 cifsFileInfo_put(cfile);
2824         free_xid(xid);
2825         /* Indication to update ctime and mtime as close is deferred */
2826         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2827         return rc;
2828 }
2829
2830 static int
2831 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2832 {
2833         int rc;
2834         unsigned int xid;
2835
2836         xid = get_xid();
2837 /* BB add check for wbc flags */
2838         get_page(page);
2839         if (!PageUptodate(page))
2840                 cifs_dbg(FYI, "ppw - page not up to date\n");
2841
2842         /*
2843          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2844          *
2845          * A writepage() implementation always needs to do either this,
2846          * or re-dirty the page with "redirty_page_for_writepage()" in
2847          * the case of a failure.
2848          *
2849          * Just unlocking the page will cause the radix tree tag-bits
2850          * to fail to update with the state of the page correctly.
2851          */
2852         set_page_writeback(page);
2853 retry_write:
2854         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2855         if (is_retryable_error(rc)) {
2856                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2857                         goto retry_write;
2858                 redirty_page_for_writepage(wbc, page);
2859         } else if (rc != 0) {
2860                 SetPageError(page);
2861                 mapping_set_error(page->mapping, rc);
2862         } else {
2863                 SetPageUptodate(page);
2864         }
2865         end_page_writeback(page);
2866         put_page(page);
2867         free_xid(xid);
2868         return rc;
2869 }
2870
2871 static int cifs_write_end(struct file *file, struct address_space *mapping,
2872                         loff_t pos, unsigned len, unsigned copied,
2873                         struct page *page, void *fsdata)
2874 {
2875         int rc;
2876         struct inode *inode = mapping->host;
2877         struct cifsFileInfo *cfile = file->private_data;
2878         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2879         __u32 pid;
2880
2881         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2882                 pid = cfile->pid;
2883         else
2884                 pid = current->tgid;
2885
2886         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2887                  page, pos, copied);
2888
2889         if (PageChecked(page)) {
2890                 if (copied == len)
2891                         SetPageUptodate(page);
2892                 ClearPageChecked(page);
2893         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2894                 SetPageUptodate(page);
2895
2896         if (!PageUptodate(page)) {
2897                 char *page_data;
2898                 unsigned offset = pos & (PAGE_SIZE - 1);
2899                 unsigned int xid;
2900
2901                 xid = get_xid();
2902                 /* this is probably better than directly calling
2903                    partialpage_write since in this function the file handle is
2904                    known which we might as well leverage */
2905                 /* BB check if anything else missing out of ppw
2906                    such as updating last write time */
2907                 page_data = kmap(page);
2908                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2909                 /* if (rc < 0) should we set writebehind rc? */
2910                 kunmap(page);
2911
2912                 free_xid(xid);
2913         } else {
2914                 rc = copied;
2915                 pos += copied;
2916                 set_page_dirty(page);
2917         }
2918
2919         if (rc > 0) {
2920                 spin_lock(&inode->i_lock);
2921                 if (pos > inode->i_size) {
2922                         i_size_write(inode, pos);
2923                         inode->i_blocks = (512 - 1 + pos) >> 9;
2924                 }
2925                 spin_unlock(&inode->i_lock);
2926         }
2927
2928         unlock_page(page);
2929         put_page(page);
2930         /* Indication to update ctime and mtime as close is deferred */
2931         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2932
2933         return rc;
2934 }
2935
2936 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2937                       int datasync)
2938 {
2939         unsigned int xid;
2940         int rc = 0;
2941         struct cifs_tcon *tcon;
2942         struct TCP_Server_Info *server;
2943         struct cifsFileInfo *smbfile = file->private_data;
2944         struct inode *inode = file_inode(file);
2945         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2946
2947         rc = file_write_and_wait_range(file, start, end);
2948         if (rc) {
2949                 trace_cifs_fsync_err(inode->i_ino, rc);
2950                 return rc;
2951         }
2952
2953         xid = get_xid();
2954
2955         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2956                  file, datasync);
2957
2958         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2959                 rc = cifs_zap_mapping(inode);
2960                 if (rc) {
2961                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2962                         rc = 0; /* don't care about it in fsync */
2963                 }
2964         }
2965
2966         tcon = tlink_tcon(smbfile->tlink);
2967         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2968                 server = tcon->ses->server;
2969                 if (server->ops->flush == NULL) {
2970                         rc = -ENOSYS;
2971                         goto strict_fsync_exit;
2972                 }
2973
2974                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2975                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2976                         if (smbfile) {
2977                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2978                                 cifsFileInfo_put(smbfile);
2979                         } else
2980                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2981                 } else
2982                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2983         }
2984
2985 strict_fsync_exit:
2986         free_xid(xid);
2987         return rc;
2988 }
2989
2990 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2991 {
2992         unsigned int xid;
2993         int rc = 0;
2994         struct cifs_tcon *tcon;
2995         struct TCP_Server_Info *server;
2996         struct cifsFileInfo *smbfile = file->private_data;
2997         struct inode *inode = file_inode(file);
2998         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2999
3000         rc = file_write_and_wait_range(file, start, end);
3001         if (rc) {
3002                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3003                 return rc;
3004         }
3005
3006         xid = get_xid();
3007
3008         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3009                  file, datasync);
3010
3011         tcon = tlink_tcon(smbfile->tlink);
3012         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3013                 server = tcon->ses->server;
3014                 if (server->ops->flush == NULL) {
3015                         rc = -ENOSYS;
3016                         goto fsync_exit;
3017                 }
3018
3019                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3020                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3021                         if (smbfile) {
3022                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3023                                 cifsFileInfo_put(smbfile);
3024                         } else
3025                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3026                 } else
3027                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3028         }
3029
3030 fsync_exit:
3031         free_xid(xid);
3032         return rc;
3033 }
3034
3035 /*
3036  * As file closes, flush all cached write data for this inode checking
3037  * for write behind errors.
3038  */
3039 int cifs_flush(struct file *file, fl_owner_t id)
3040 {
3041         struct inode *inode = file_inode(file);
3042         int rc = 0;
3043
3044         if (file->f_mode & FMODE_WRITE)
3045                 rc = filemap_write_and_wait(inode->i_mapping);
3046
3047         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3048         if (rc) {
3049                 /* get more nuanced writeback errors */
3050                 rc = filemap_check_wb_err(file->f_mapping, 0);
3051                 trace_cifs_flush_err(inode->i_ino, rc);
3052         }
3053         return rc;
3054 }
3055
3056 static int
3057 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3058 {
3059         int rc = 0;
3060         unsigned long i;
3061
3062         for (i = 0; i < num_pages; i++) {
3063                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3064                 if (!pages[i]) {
3065                         /*
3066                          * save number of pages we have already allocated and
3067                          * return with ENOMEM error
3068                          */
3069                         num_pages = i;
3070                         rc = -ENOMEM;
3071                         break;
3072                 }
3073         }
3074
3075         if (rc) {
3076                 for (i = 0; i < num_pages; i++)
3077                         put_page(pages[i]);
3078         }
3079         return rc;
3080 }
3081
3082 static inline
3083 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3084 {
3085         size_t num_pages;
3086         size_t clen;
3087
3088         clen = min_t(const size_t, len, wsize);
3089         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3090
3091         if (cur_len)
3092                 *cur_len = clen;
3093
3094         return num_pages;
3095 }
3096
3097 static void
3098 cifs_uncached_writedata_release(struct kref *refcount)
3099 {
3100         int i;
3101         struct cifs_writedata *wdata = container_of(refcount,
3102                                         struct cifs_writedata, refcount);
3103
3104         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3105         for (i = 0; i < wdata->nr_pages; i++)
3106                 put_page(wdata->pages[i]);
3107         cifs_writedata_release(refcount);
3108 }
3109
3110 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3111
3112 static void
3113 cifs_uncached_writev_complete(struct work_struct *work)
3114 {
3115         struct cifs_writedata *wdata = container_of(work,
3116                                         struct cifs_writedata, work);
3117         struct inode *inode = d_inode(wdata->cfile->dentry);
3118         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3119
3120         spin_lock(&inode->i_lock);
3121         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3122         if (cifsi->server_eof > inode->i_size)
3123                 i_size_write(inode, cifsi->server_eof);
3124         spin_unlock(&inode->i_lock);
3125
3126         complete(&wdata->done);
3127         collect_uncached_write_data(wdata->ctx);
3128         /* the below call can possibly free the last ref to aio ctx */
3129         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3130 }
3131
3132 static int
3133 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3134                       size_t *len, unsigned long *num_pages)
3135 {
3136         size_t save_len, copied, bytes, cur_len = *len;
3137         unsigned long i, nr_pages = *num_pages;
3138
3139         save_len = cur_len;
3140         for (i = 0; i < nr_pages; i++) {
3141                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3142                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3143                 cur_len -= copied;
3144                 /*
3145                  * If we didn't copy as much as we expected, then that
3146                  * may mean we trod into an unmapped area. Stop copying
3147                  * at that point. On the next pass through the big
3148                  * loop, we'll likely end up getting a zero-length
3149                  * write and bailing out of it.
3150                  */
3151                 if (copied < bytes)
3152                         break;
3153         }
3154         cur_len = save_len - cur_len;
3155         *len = cur_len;
3156
3157         /*
3158          * If we have no data to send, then that probably means that
3159          * the copy above failed altogether. That's most likely because
3160          * the address in the iovec was bogus. Return -EFAULT and let
3161          * the caller free anything we allocated and bail out.
3162          */
3163         if (!cur_len)
3164                 return -EFAULT;
3165
3166         /*
3167          * i + 1 now represents the number of pages we actually used in
3168          * the copy phase above.
3169          */
3170         *num_pages = i + 1;
3171         return 0;
3172 }
3173
3174 static int
3175 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3176         struct cifs_aio_ctx *ctx)
3177 {
3178         unsigned int wsize;
3179         struct cifs_credits credits;
3180         int rc;
3181         struct TCP_Server_Info *server = wdata->server;
3182
3183         do {
3184                 if (wdata->cfile->invalidHandle) {
3185                         rc = cifs_reopen_file(wdata->cfile, false);
3186                         if (rc == -EAGAIN)
3187                                 continue;
3188                         else if (rc)
3189                                 break;
3190                 }
3191
3192
3193                 /*
3194                  * Wait for credits to resend this wdata.
3195                  * Note: we are attempting to resend the whole wdata not in
3196                  * segments
3197                  */
3198                 do {
3199                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3200                                                 &wsize, &credits);
3201                         if (rc)
3202                                 goto fail;
3203
3204                         if (wsize < wdata->bytes) {
3205                                 add_credits_and_wake_if(server, &credits, 0);
3206                                 msleep(1000);
3207                         }
3208                 } while (wsize < wdata->bytes);
3209                 wdata->credits = credits;
3210
3211                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3212
3213                 if (!rc) {
3214                         if (wdata->cfile->invalidHandle)
3215                                 rc = -EAGAIN;
3216                         else {
3217 #ifdef CONFIG_CIFS_SMB_DIRECT
3218                                 if (wdata->mr) {
3219                                         wdata->mr->need_invalidate = true;
3220                                         smbd_deregister_mr(wdata->mr);
3221                                         wdata->mr = NULL;
3222                                 }
3223 #endif
3224                                 rc = server->ops->async_writev(wdata,
3225                                         cifs_uncached_writedata_release);
3226                         }
3227                 }
3228
3229                 /* If the write was successfully sent, we are done */
3230                 if (!rc) {
3231                         list_add_tail(&wdata->list, wdata_list);
3232                         return 0;
3233                 }
3234
3235                 /* Roll back credits and retry if needed */
3236                 add_credits_and_wake_if(server, &wdata->credits, 0);
3237         } while (rc == -EAGAIN);
3238
3239 fail:
3240         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3241         return rc;
3242 }
3243
3244 static int
3245 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3246                      struct cifsFileInfo *open_file,
3247                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3248                      struct cifs_aio_ctx *ctx)
3249 {
3250         int rc = 0;
3251         size_t cur_len;
3252         unsigned long nr_pages, num_pages, i;
3253         struct cifs_writedata *wdata;
3254         struct iov_iter saved_from = *from;
3255         loff_t saved_offset = offset;
3256         pid_t pid;
3257         struct TCP_Server_Info *server;
3258         struct page **pagevec;
3259         size_t start;
3260         unsigned int xid;
3261
3262         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3263                 pid = open_file->pid;
3264         else
3265                 pid = current->tgid;
3266
3267         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3268         xid = get_xid();
3269
3270         do {
3271                 unsigned int wsize;
3272                 struct cifs_credits credits_on_stack;
3273                 struct cifs_credits *credits = &credits_on_stack;
3274
3275                 if (open_file->invalidHandle) {
3276                         rc = cifs_reopen_file(open_file, false);
3277                         if (rc == -EAGAIN)
3278                                 continue;
3279                         else if (rc)
3280                                 break;
3281                 }
3282
3283                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3284                                                    &wsize, credits);
3285                 if (rc)
3286                         break;
3287
3288                 cur_len = min_t(const size_t, len, wsize);
3289
3290                 if (ctx->direct_io) {
3291                         ssize_t result;
3292
3293                         result = iov_iter_get_pages_alloc2(
3294                                 from, &pagevec, cur_len, &start);
3295                         if (result < 0) {
3296                                 cifs_dbg(VFS,
3297                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3298                                          result, iov_iter_type(from),
3299                                          from->iov_offset, from->count);
3300                                 dump_stack();
3301
3302                                 rc = result;
3303                                 add_credits_and_wake_if(server, credits, 0);
3304                                 break;
3305                         }
3306                         cur_len = (size_t)result;
3307
3308                         nr_pages =
3309                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3310
3311                         wdata = cifs_writedata_direct_alloc(pagevec,
3312                                              cifs_uncached_writev_complete);
3313                         if (!wdata) {
3314                                 rc = -ENOMEM;
3315                                 for (i = 0; i < nr_pages; i++)
3316                                         put_page(pagevec[i]);
3317                                 kvfree(pagevec);
3318                                 add_credits_and_wake_if(server, credits, 0);
3319                                 break;
3320                         }
3321
3322
3323                         wdata->page_offset = start;
3324                         wdata->tailsz =
3325                                 nr_pages > 1 ?
3326                                         cur_len - (PAGE_SIZE - start) -
3327                                         (nr_pages - 2) * PAGE_SIZE :
3328                                         cur_len;
3329                 } else {
3330                         nr_pages = get_numpages(wsize, len, &cur_len);
3331                         wdata = cifs_writedata_alloc(nr_pages,
3332                                              cifs_uncached_writev_complete);
3333                         if (!wdata) {
3334                                 rc = -ENOMEM;
3335                                 add_credits_and_wake_if(server, credits, 0);
3336                                 break;
3337                         }
3338
3339                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3340                         if (rc) {
3341                                 kvfree(wdata->pages);
3342                                 kfree(wdata);
3343                                 add_credits_and_wake_if(server, credits, 0);
3344                                 break;
3345                         }
3346
3347                         num_pages = nr_pages;
3348                         rc = wdata_fill_from_iovec(
3349                                 wdata, from, &cur_len, &num_pages);
3350                         if (rc) {
3351                                 for (i = 0; i < nr_pages; i++)
3352                                         put_page(wdata->pages[i]);
3353                                 kvfree(wdata->pages);
3354                                 kfree(wdata);
3355                                 add_credits_and_wake_if(server, credits, 0);
3356                                 break;
3357                         }
3358
3359                         /*
3360                          * Bring nr_pages down to the number of pages we
3361                          * actually used, and free any pages that we didn't use.
3362                          */
3363                         for ( ; nr_pages > num_pages; nr_pages--)
3364                                 put_page(wdata->pages[nr_pages - 1]);
3365
3366                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3367                 }
3368
3369                 wdata->sync_mode = WB_SYNC_ALL;
3370                 wdata->nr_pages = nr_pages;
3371                 wdata->offset = (__u64)offset;
3372                 wdata->cfile = cifsFileInfo_get(open_file);
3373                 wdata->server = server;
3374                 wdata->pid = pid;
3375                 wdata->bytes = cur_len;
3376                 wdata->pagesz = PAGE_SIZE;
3377                 wdata->credits = credits_on_stack;
3378                 wdata->ctx = ctx;
3379                 kref_get(&ctx->refcount);
3380
3381                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3382
3383                 if (!rc) {
3384                         if (wdata->cfile->invalidHandle)
3385                                 rc = -EAGAIN;
3386                         else
3387                                 rc = server->ops->async_writev(wdata,
3388                                         cifs_uncached_writedata_release);
3389                 }
3390
3391                 if (rc) {
3392                         add_credits_and_wake_if(server, &wdata->credits, 0);
3393                         kref_put(&wdata->refcount,
3394                                  cifs_uncached_writedata_release);
3395                         if (rc == -EAGAIN) {
3396                                 *from = saved_from;
3397                                 iov_iter_advance(from, offset - saved_offset);
3398                                 continue;
3399                         }
3400                         break;
3401                 }
3402
3403                 list_add_tail(&wdata->list, wdata_list);
3404                 offset += cur_len;
3405                 len -= cur_len;
3406         } while (len > 0);
3407
3408         free_xid(xid);
3409         return rc;
3410 }
3411
3412 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3413 {
3414         struct cifs_writedata *wdata, *tmp;
3415         struct cifs_tcon *tcon;
3416         struct cifs_sb_info *cifs_sb;
3417         struct dentry *dentry = ctx->cfile->dentry;
3418         ssize_t rc;
3419
3420         tcon = tlink_tcon(ctx->cfile->tlink);
3421         cifs_sb = CIFS_SB(dentry->d_sb);
3422
3423         mutex_lock(&ctx->aio_mutex);
3424
3425         if (list_empty(&ctx->list)) {
3426                 mutex_unlock(&ctx->aio_mutex);
3427                 return;
3428         }
3429
3430         rc = ctx->rc;
3431         /*
3432          * Wait for and collect replies for any successful sends in order of
3433          * increasing offset. Once an error is hit, then return without waiting
3434          * for any more replies.
3435          */
3436 restart_loop:
3437         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3438                 if (!rc) {
3439                         if (!try_wait_for_completion(&wdata->done)) {
3440                                 mutex_unlock(&ctx->aio_mutex);
3441                                 return;
3442                         }
3443
3444                         if (wdata->result)
3445                                 rc = wdata->result;
3446                         else
3447                                 ctx->total_len += wdata->bytes;
3448
3449                         /* resend call if it's a retryable error */
3450                         if (rc == -EAGAIN) {
3451                                 struct list_head tmp_list;
3452                                 struct iov_iter tmp_from = ctx->iter;
3453
3454                                 INIT_LIST_HEAD(&tmp_list);
3455                                 list_del_init(&wdata->list);
3456
3457                                 if (ctx->direct_io)
3458                                         rc = cifs_resend_wdata(
3459                                                 wdata, &tmp_list, ctx);
3460                                 else {
3461                                         iov_iter_advance(&tmp_from,
3462                                                  wdata->offset - ctx->pos);
3463
3464                                         rc = cifs_write_from_iter(wdata->offset,
3465                                                 wdata->bytes, &tmp_from,
3466                                                 ctx->cfile, cifs_sb, &tmp_list,
3467                                                 ctx);
3468
3469                                         kref_put(&wdata->refcount,
3470                                                 cifs_uncached_writedata_release);
3471                                 }
3472
3473                                 list_splice(&tmp_list, &ctx->list);
3474                                 goto restart_loop;
3475                         }
3476                 }
3477                 list_del_init(&wdata->list);
3478                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3479         }
3480
3481         cifs_stats_bytes_written(tcon, ctx->total_len);
3482         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3483
3484         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3485
3486         mutex_unlock(&ctx->aio_mutex);
3487
3488         if (ctx->iocb && ctx->iocb->ki_complete)
3489                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3490         else
3491                 complete(&ctx->done);
3492 }
3493
3494 static ssize_t __cifs_writev(
3495         struct kiocb *iocb, struct iov_iter *from, bool direct)
3496 {
3497         struct file *file = iocb->ki_filp;
3498         ssize_t total_written = 0;
3499         struct cifsFileInfo *cfile;
3500         struct cifs_tcon *tcon;
3501         struct cifs_sb_info *cifs_sb;
3502         struct cifs_aio_ctx *ctx;
3503         struct iov_iter saved_from = *from;
3504         size_t len = iov_iter_count(from);
3505         int rc;
3506
3507         /*
3508          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3509          * In this case, fall back to non-direct write function.
3510          * this could be improved by getting pages directly in ITER_KVEC
3511          */
3512         if (direct && iov_iter_is_kvec(from)) {
3513                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3514                 direct = false;
3515         }
3516
3517         rc = generic_write_checks(iocb, from);
3518         if (rc <= 0)
3519                 return rc;
3520
3521         cifs_sb = CIFS_FILE_SB(file);
3522         cfile = file->private_data;
3523         tcon = tlink_tcon(cfile->tlink);
3524
3525         if (!tcon->ses->server->ops->async_writev)
3526                 return -ENOSYS;
3527
3528         ctx = cifs_aio_ctx_alloc();
3529         if (!ctx)
3530                 return -ENOMEM;
3531
3532         ctx->cfile = cifsFileInfo_get(cfile);
3533
3534         if (!is_sync_kiocb(iocb))
3535                 ctx->iocb = iocb;
3536
3537         ctx->pos = iocb->ki_pos;
3538
3539         if (direct) {
3540                 ctx->direct_io = true;
3541                 ctx->iter = *from;
3542                 ctx->len = len;
3543         } else {
3544                 rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE);
3545                 if (rc) {
3546                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3547                         return rc;
3548                 }
3549         }
3550
3551         /* grab a lock here due to read response handlers can access ctx */
3552         mutex_lock(&ctx->aio_mutex);
3553
3554         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3555                                   cfile, cifs_sb, &ctx->list, ctx);
3556
3557         /*
3558          * If at least one write was successfully sent, then discard any rc
3559          * value from the later writes. If the other write succeeds, then
3560          * we'll end up returning whatever was written. If it fails, then
3561          * we'll get a new rc value from that.
3562          */
3563         if (!list_empty(&ctx->list))
3564                 rc = 0;
3565
3566         mutex_unlock(&ctx->aio_mutex);
3567
3568         if (rc) {
3569                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3570                 return rc;
3571         }
3572
3573         if (!is_sync_kiocb(iocb)) {
3574                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3575                 return -EIOCBQUEUED;
3576         }
3577
3578         rc = wait_for_completion_killable(&ctx->done);
3579         if (rc) {
3580                 mutex_lock(&ctx->aio_mutex);
3581                 ctx->rc = rc = -EINTR;
3582                 total_written = ctx->total_len;
3583                 mutex_unlock(&ctx->aio_mutex);
3584         } else {
3585                 rc = ctx->rc;
3586                 total_written = ctx->total_len;
3587         }
3588
3589         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3590
3591         if (unlikely(!total_written))
3592                 return rc;
3593
3594         iocb->ki_pos += total_written;
3595         return total_written;
3596 }
3597
3598 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3599 {
3600         struct file *file = iocb->ki_filp;
3601
3602         cifs_revalidate_mapping(file->f_inode);
3603         return __cifs_writev(iocb, from, true);
3604 }
3605
3606 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3607 {
3608         return __cifs_writev(iocb, from, false);
3609 }
3610
3611 static ssize_t
3612 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3613 {
3614         struct file *file = iocb->ki_filp;
3615         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3616         struct inode *inode = file->f_mapping->host;
3617         struct cifsInodeInfo *cinode = CIFS_I(inode);
3618         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3619         ssize_t rc;
3620
3621         inode_lock(inode);
3622         /*
3623          * We need to hold the sem to be sure nobody modifies lock list
3624          * with a brlock that prevents writing.
3625          */
3626         down_read(&cinode->lock_sem);
3627
3628         rc = generic_write_checks(iocb, from);
3629         if (rc <= 0)
3630                 goto out;
3631
3632         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3633                                      server->vals->exclusive_lock_type, 0,
3634                                      NULL, CIFS_WRITE_OP))
3635                 rc = __generic_file_write_iter(iocb, from);
3636         else
3637                 rc = -EACCES;
3638 out:
3639         up_read(&cinode->lock_sem);
3640         inode_unlock(inode);
3641
3642         if (rc > 0)
3643                 rc = generic_write_sync(iocb, rc);
3644         return rc;
3645 }
3646
3647 ssize_t
3648 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3649 {
3650         struct inode *inode = file_inode(iocb->ki_filp);
3651         struct cifsInodeInfo *cinode = CIFS_I(inode);
3652         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3653         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3654                                                 iocb->ki_filp->private_data;
3655         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3656         ssize_t written;
3657
3658         written = cifs_get_writer(cinode);
3659         if (written)
3660                 return written;
3661
3662         if (CIFS_CACHE_WRITE(cinode)) {
3663                 if (cap_unix(tcon->ses) &&
3664                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3665                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3666                         written = generic_file_write_iter(iocb, from);
3667                         goto out;
3668                 }
3669                 written = cifs_writev(iocb, from);
3670                 goto out;
3671         }
3672         /*
3673          * For non-oplocked files in strict cache mode we need to write the data
3674          * to the server exactly from the pos to pos+len-1 rather than flush all
3675          * affected pages because it may cause a error with mandatory locks on
3676          * these pages but not on the region from pos to ppos+len-1.
3677          */
3678         written = cifs_user_writev(iocb, from);
3679         if (CIFS_CACHE_READ(cinode)) {
3680                 /*
3681                  * We have read level caching and we have just sent a write
3682                  * request to the server thus making data in the cache stale.
3683                  * Zap the cache and set oplock/lease level to NONE to avoid
3684                  * reading stale data from the cache. All subsequent read
3685                  * operations will read new data from the server.
3686                  */
3687                 cifs_zap_mapping(inode);
3688                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3689                          inode);
3690                 cinode->oplock = 0;
3691         }
3692 out:
3693         cifs_put_writer(cinode);
3694         return written;
3695 }
3696
3697 static struct cifs_readdata *
3698 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3699 {
3700         struct cifs_readdata *rdata;
3701
3702         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3703         if (rdata != NULL) {
3704                 rdata->pages = pages;
3705                 kref_init(&rdata->refcount);
3706                 INIT_LIST_HEAD(&rdata->list);
3707                 init_completion(&rdata->done);
3708                 INIT_WORK(&rdata->work, complete);
3709         }
3710
3711         return rdata;
3712 }
3713
3714 static struct cifs_readdata *
3715 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3716 {
3717         struct page **pages =
3718                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3719         struct cifs_readdata *ret = NULL;
3720
3721         if (pages) {
3722                 ret = cifs_readdata_direct_alloc(pages, complete);
3723                 if (!ret)
3724                         kfree(pages);
3725         }
3726
3727         return ret;
3728 }
3729
3730 void
3731 cifs_readdata_release(struct kref *refcount)
3732 {
3733         struct cifs_readdata *rdata = container_of(refcount,
3734                                         struct cifs_readdata, refcount);
3735 #ifdef CONFIG_CIFS_SMB_DIRECT
3736         if (rdata->mr) {
3737                 smbd_deregister_mr(rdata->mr);
3738                 rdata->mr = NULL;
3739         }
3740 #endif
3741         if (rdata->cfile)
3742                 cifsFileInfo_put(rdata->cfile);
3743
3744         kvfree(rdata->pages);
3745         kfree(rdata);
3746 }
3747
3748 static int
3749 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3750 {
3751         int rc = 0;
3752         struct page *page;
3753         unsigned int i;
3754
3755         for (i = 0; i < nr_pages; i++) {
3756                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3757                 if (!page) {
3758                         rc = -ENOMEM;
3759                         break;
3760                 }
3761                 rdata->pages[i] = page;
3762         }
3763
3764         if (rc) {
3765                 unsigned int nr_page_failed = i;
3766
3767                 for (i = 0; i < nr_page_failed; i++) {
3768                         put_page(rdata->pages[i]);
3769                         rdata->pages[i] = NULL;
3770                 }
3771         }
3772         return rc;
3773 }
3774
3775 static void
3776 cifs_uncached_readdata_release(struct kref *refcount)
3777 {
3778         struct cifs_readdata *rdata = container_of(refcount,
3779                                         struct cifs_readdata, refcount);
3780         unsigned int i;
3781
3782         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3783         for (i = 0; i < rdata->nr_pages; i++) {
3784                 put_page(rdata->pages[i]);
3785         }
3786         cifs_readdata_release(refcount);
3787 }
3788
3789 /**
3790  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3791  * @rdata:      the readdata response with list of pages holding data
3792  * @iter:       destination for our data
3793  *
3794  * This function copies data from a list of pages in a readdata response into
3795  * an array of iovecs. It will first calculate where the data should go
3796  * based on the info in the readdata and then copy the data into that spot.
3797  */
3798 static int
3799 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3800 {
3801         size_t remaining = rdata->got_bytes;
3802         unsigned int i;
3803
3804         for (i = 0; i < rdata->nr_pages; i++) {
3805                 struct page *page = rdata->pages[i];
3806                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3807                 size_t written;
3808
3809                 if (unlikely(iov_iter_is_pipe(iter))) {
3810                         void *addr = kmap_atomic(page);
3811
3812                         written = copy_to_iter(addr, copy, iter);
3813                         kunmap_atomic(addr);
3814                 } else
3815                         written = copy_page_to_iter(page, 0, copy, iter);
3816                 remaining -= written;
3817                 if (written < copy && iov_iter_count(iter) > 0)
3818                         break;
3819         }
3820         return remaining ? -EFAULT : 0;
3821 }
3822
3823 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3824
3825 static void
3826 cifs_uncached_readv_complete(struct work_struct *work)
3827 {
3828         struct cifs_readdata *rdata = container_of(work,
3829                                                 struct cifs_readdata, work);
3830
3831         complete(&rdata->done);
3832         collect_uncached_read_data(rdata->ctx);
3833         /* the below call can possibly free the last ref to aio ctx */
3834         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3835 }
3836
3837 static int
3838 uncached_fill_pages(struct TCP_Server_Info *server,
3839                     struct cifs_readdata *rdata, struct iov_iter *iter,
3840                     unsigned int len)
3841 {
3842         int result = 0;
3843         unsigned int i;
3844         unsigned int nr_pages = rdata->nr_pages;
3845         unsigned int page_offset = rdata->page_offset;
3846
3847         rdata->got_bytes = 0;
3848         rdata->tailsz = PAGE_SIZE;
3849         for (i = 0; i < nr_pages; i++) {
3850                 struct page *page = rdata->pages[i];
3851                 size_t n;
3852                 unsigned int segment_size = rdata->pagesz;
3853
3854                 if (i == 0)
3855                         segment_size -= page_offset;
3856                 else
3857                         page_offset = 0;
3858
3859
3860                 if (len <= 0) {
3861                         /* no need to hold page hostage */
3862                         rdata->pages[i] = NULL;
3863                         rdata->nr_pages--;
3864                         put_page(page);
3865                         continue;
3866                 }
3867
3868                 n = len;
3869                 if (len >= segment_size)
3870                         /* enough data to fill the page */
3871                         n = segment_size;
3872                 else
3873                         rdata->tailsz = len;
3874                 len -= n;
3875
3876                 if (iter)
3877                         result = copy_page_from_iter(
3878                                         page, page_offset, n, iter);
3879 #ifdef CONFIG_CIFS_SMB_DIRECT
3880                 else if (rdata->mr)
3881                         result = n;
3882 #endif
3883                 else
3884                         result = cifs_read_page_from_socket(
3885                                         server, page, page_offset, n);
3886                 if (result < 0)
3887                         break;
3888
3889                 rdata->got_bytes += result;
3890         }
3891
3892         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3893                                                 rdata->got_bytes : result;
3894 }
3895
3896 static int
3897 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3898                               struct cifs_readdata *rdata, unsigned int len)
3899 {
3900         return uncached_fill_pages(server, rdata, NULL, len);
3901 }
3902
3903 static int
3904 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3905                               struct cifs_readdata *rdata,
3906                               struct iov_iter *iter)
3907 {
3908         return uncached_fill_pages(server, rdata, iter, iter->count);
3909 }
3910
3911 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3912                         struct list_head *rdata_list,
3913                         struct cifs_aio_ctx *ctx)
3914 {
3915         unsigned int rsize;
3916         struct cifs_credits credits;
3917         int rc;
3918         struct TCP_Server_Info *server;
3919
3920         /* XXX: should we pick a new channel here? */
3921         server = rdata->server;
3922
3923         do {
3924                 if (rdata->cfile->invalidHandle) {
3925                         rc = cifs_reopen_file(rdata->cfile, true);
3926                         if (rc == -EAGAIN)
3927                                 continue;
3928                         else if (rc)
3929                                 break;
3930                 }
3931
3932                 /*
3933                  * Wait for credits to resend this rdata.
3934                  * Note: we are attempting to resend the whole rdata not in
3935                  * segments
3936                  */
3937                 do {
3938                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3939                                                 &rsize, &credits);
3940
3941                         if (rc)
3942                                 goto fail;
3943
3944                         if (rsize < rdata->bytes) {
3945                                 add_credits_and_wake_if(server, &credits, 0);
3946                                 msleep(1000);
3947                         }
3948                 } while (rsize < rdata->bytes);
3949                 rdata->credits = credits;
3950
3951                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3952                 if (!rc) {
3953                         if (rdata->cfile->invalidHandle)
3954                                 rc = -EAGAIN;
3955                         else {
3956 #ifdef CONFIG_CIFS_SMB_DIRECT
3957                                 if (rdata->mr) {
3958                                         rdata->mr->need_invalidate = true;
3959                                         smbd_deregister_mr(rdata->mr);
3960                                         rdata->mr = NULL;
3961                                 }
3962 #endif
3963                                 rc = server->ops->async_readv(rdata);
3964                         }
3965                 }
3966
3967                 /* If the read was successfully sent, we are done */
3968                 if (!rc) {
3969                         /* Add to aio pending list */
3970                         list_add_tail(&rdata->list, rdata_list);
3971                         return 0;
3972                 }
3973
3974                 /* Roll back credits and retry if needed */
3975                 add_credits_and_wake_if(server, &rdata->credits, 0);
3976         } while (rc == -EAGAIN);
3977
3978 fail:
3979         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3980         return rc;
3981 }
3982
3983 static int
3984 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3985                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3986                      struct cifs_aio_ctx *ctx)
3987 {
3988         struct cifs_readdata *rdata;
3989         unsigned int npages, rsize;
3990         struct cifs_credits credits_on_stack;
3991         struct cifs_credits *credits = &credits_on_stack;
3992         size_t cur_len;
3993         int rc;
3994         pid_t pid;
3995         struct TCP_Server_Info *server;
3996         struct page **pagevec;
3997         size_t start;
3998         struct iov_iter direct_iov = ctx->iter;
3999
4000         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4001
4002         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4003                 pid = open_file->pid;
4004         else
4005                 pid = current->tgid;
4006
4007         if (ctx->direct_io)
4008                 iov_iter_advance(&direct_iov, offset - ctx->pos);
4009
4010         do {
4011                 if (open_file->invalidHandle) {
4012                         rc = cifs_reopen_file(open_file, true);
4013                         if (rc == -EAGAIN)
4014                                 continue;
4015                         else if (rc)
4016                                 break;
4017                 }
4018
4019                 if (cifs_sb->ctx->rsize == 0)
4020                         cifs_sb->ctx->rsize =
4021                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4022                                                              cifs_sb->ctx);
4023
4024                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4025                                                    &rsize, credits);
4026                 if (rc)
4027                         break;
4028
4029                 cur_len = min_t(const size_t, len, rsize);
4030
4031                 if (ctx->direct_io) {
4032                         ssize_t result;
4033
4034                         result = iov_iter_get_pages_alloc2(
4035                                         &direct_iov, &pagevec,
4036                                         cur_len, &start);
4037                         if (result < 0) {
4038                                 cifs_dbg(VFS,
4039                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4040                                          result, iov_iter_type(&direct_iov),
4041                                          direct_iov.iov_offset,
4042                                          direct_iov.count);
4043                                 dump_stack();
4044
4045                                 rc = result;
4046                                 add_credits_and_wake_if(server, credits, 0);
4047                                 break;
4048                         }
4049                         cur_len = (size_t)result;
4050
4051                         rdata = cifs_readdata_direct_alloc(
4052                                         pagevec, cifs_uncached_readv_complete);
4053                         if (!rdata) {
4054                                 add_credits_and_wake_if(server, credits, 0);
4055                                 rc = -ENOMEM;
4056                                 break;
4057                         }
4058
4059                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4060                         rdata->page_offset = start;
4061                         rdata->tailsz = npages > 1 ?
4062                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4063                                 cur_len;
4064
4065                 } else {
4066
4067                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4068                         /* allocate a readdata struct */
4069                         rdata = cifs_readdata_alloc(npages,
4070                                             cifs_uncached_readv_complete);
4071                         if (!rdata) {
4072                                 add_credits_and_wake_if(server, credits, 0);
4073                                 rc = -ENOMEM;
4074                                 break;
4075                         }
4076
4077                         rc = cifs_read_allocate_pages(rdata, npages);
4078                         if (rc) {
4079                                 kvfree(rdata->pages);
4080                                 kfree(rdata);
4081                                 add_credits_and_wake_if(server, credits, 0);
4082                                 break;
4083                         }
4084
4085                         rdata->tailsz = PAGE_SIZE;
4086                 }
4087
4088                 rdata->server = server;
4089                 rdata->cfile = cifsFileInfo_get(open_file);
4090                 rdata->nr_pages = npages;
4091                 rdata->offset = offset;
4092                 rdata->bytes = cur_len;
4093                 rdata->pid = pid;
4094                 rdata->pagesz = PAGE_SIZE;
4095                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4096                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4097                 rdata->credits = credits_on_stack;
4098                 rdata->ctx = ctx;
4099                 kref_get(&ctx->refcount);
4100
4101                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4102
4103                 if (!rc) {
4104                         if (rdata->cfile->invalidHandle)
4105                                 rc = -EAGAIN;
4106                         else
4107                                 rc = server->ops->async_readv(rdata);
4108                 }
4109
4110                 if (rc) {
4111                         add_credits_and_wake_if(server, &rdata->credits, 0);
4112                         kref_put(&rdata->refcount,
4113                                 cifs_uncached_readdata_release);
4114                         if (rc == -EAGAIN) {
4115                                 iov_iter_revert(&direct_iov, cur_len);
4116                                 continue;
4117                         }
4118                         break;
4119                 }
4120
4121                 list_add_tail(&rdata->list, rdata_list);
4122                 offset += cur_len;
4123                 len -= cur_len;
4124         } while (len > 0);
4125
4126         return rc;
4127 }
4128
4129 static void
4130 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4131 {
4132         struct cifs_readdata *rdata, *tmp;
4133         struct iov_iter *to = &ctx->iter;
4134         struct cifs_sb_info *cifs_sb;
4135         int rc;
4136
4137         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4138
4139         mutex_lock(&ctx->aio_mutex);
4140
4141         if (list_empty(&ctx->list)) {
4142                 mutex_unlock(&ctx->aio_mutex);
4143                 return;
4144         }
4145
4146         rc = ctx->rc;
4147         /* the loop below should proceed in the order of increasing offsets */
4148 again:
4149         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4150                 if (!rc) {
4151                         if (!try_wait_for_completion(&rdata->done)) {
4152                                 mutex_unlock(&ctx->aio_mutex);
4153                                 return;
4154                         }
4155
4156                         if (rdata->result == -EAGAIN) {
4157                                 /* resend call if it's a retryable error */
4158                                 struct list_head tmp_list;
4159                                 unsigned int got_bytes = rdata->got_bytes;
4160
4161                                 list_del_init(&rdata->list);
4162                                 INIT_LIST_HEAD(&tmp_list);
4163
4164                                 /*
4165                                  * Got a part of data and then reconnect has
4166                                  * happened -- fill the buffer and continue
4167                                  * reading.
4168                                  */
4169                                 if (got_bytes && got_bytes < rdata->bytes) {
4170                                         rc = 0;
4171                                         if (!ctx->direct_io)
4172                                                 rc = cifs_readdata_to_iov(rdata, to);
4173                                         if (rc) {
4174                                                 kref_put(&rdata->refcount,
4175                                                         cifs_uncached_readdata_release);
4176                                                 continue;
4177                                         }
4178                                 }
4179
4180                                 if (ctx->direct_io) {
4181                                         /*
4182                                          * Re-use rdata as this is a
4183                                          * direct I/O
4184                                          */
4185                                         rc = cifs_resend_rdata(
4186                                                 rdata,
4187                                                 &tmp_list, ctx);
4188                                 } else {
4189                                         rc = cifs_send_async_read(
4190                                                 rdata->offset + got_bytes,
4191                                                 rdata->bytes - got_bytes,
4192                                                 rdata->cfile, cifs_sb,
4193                                                 &tmp_list, ctx);
4194
4195                                         kref_put(&rdata->refcount,
4196                                                 cifs_uncached_readdata_release);
4197                                 }
4198
4199                                 list_splice(&tmp_list, &ctx->list);
4200
4201                                 goto again;
4202                         } else if (rdata->result)
4203                                 rc = rdata->result;
4204                         else if (!ctx->direct_io)
4205                                 rc = cifs_readdata_to_iov(rdata, to);
4206
4207                         /* if there was a short read -- discard anything left */
4208                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4209                                 rc = -ENODATA;
4210
4211                         ctx->total_len += rdata->got_bytes;
4212                 }
4213                 list_del_init(&rdata->list);
4214                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4215         }
4216
4217         if (!ctx->direct_io)
4218                 ctx->total_len = ctx->len - iov_iter_count(to);
4219
4220         /* mask nodata case */
4221         if (rc == -ENODATA)
4222                 rc = 0;
4223
4224         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4225
4226         mutex_unlock(&ctx->aio_mutex);
4227
4228         if (ctx->iocb && ctx->iocb->ki_complete)
4229                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4230         else
4231                 complete(&ctx->done);
4232 }
4233
4234 static ssize_t __cifs_readv(
4235         struct kiocb *iocb, struct iov_iter *to, bool direct)
4236 {
4237         size_t len;
4238         struct file *file = iocb->ki_filp;
4239         struct cifs_sb_info *cifs_sb;
4240         struct cifsFileInfo *cfile;
4241         struct cifs_tcon *tcon;
4242         ssize_t rc, total_read = 0;
4243         loff_t offset = iocb->ki_pos;
4244         struct cifs_aio_ctx *ctx;
4245
4246         /*
4247          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4248          * fall back to data copy read path
4249          * this could be improved by getting pages directly in ITER_KVEC
4250          */
4251         if (direct && iov_iter_is_kvec(to)) {
4252                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4253                 direct = false;
4254         }
4255
4256         len = iov_iter_count(to);
4257         if (!len)
4258                 return 0;
4259
4260         cifs_sb = CIFS_FILE_SB(file);
4261         cfile = file->private_data;
4262         tcon = tlink_tcon(cfile->tlink);
4263
4264         if (!tcon->ses->server->ops->async_readv)
4265                 return -ENOSYS;
4266
4267         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4268                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4269
4270         ctx = cifs_aio_ctx_alloc();
4271         if (!ctx)
4272                 return -ENOMEM;
4273
4274         ctx->cfile = cifsFileInfo_get(cfile);
4275
4276         if (!is_sync_kiocb(iocb))
4277                 ctx->iocb = iocb;
4278
4279         if (user_backed_iter(to))
4280                 ctx->should_dirty = true;
4281
4282         if (direct) {
4283                 ctx->pos = offset;
4284                 ctx->direct_io = true;
4285                 ctx->iter = *to;
4286                 ctx->len = len;
4287         } else {
4288                 rc = setup_aio_ctx_iter(ctx, to, ITER_DEST);
4289                 if (rc) {
4290                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4291                         return rc;
4292                 }
4293                 len = ctx->len;
4294         }
4295
4296         if (direct) {
4297                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4298                                                   offset, offset + len - 1);
4299                 if (rc) {
4300                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4301                         return -EAGAIN;
4302                 }
4303         }
4304
4305         /* grab a lock here due to read response handlers can access ctx */
4306         mutex_lock(&ctx->aio_mutex);
4307
4308         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4309
4310         /* if at least one read request send succeeded, then reset rc */
4311         if (!list_empty(&ctx->list))
4312                 rc = 0;
4313
4314         mutex_unlock(&ctx->aio_mutex);
4315
4316         if (rc) {
4317                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4318                 return rc;
4319         }
4320
4321         if (!is_sync_kiocb(iocb)) {
4322                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4323                 return -EIOCBQUEUED;
4324         }
4325
4326         rc = wait_for_completion_killable(&ctx->done);
4327         if (rc) {
4328                 mutex_lock(&ctx->aio_mutex);
4329                 ctx->rc = rc = -EINTR;
4330                 total_read = ctx->total_len;
4331                 mutex_unlock(&ctx->aio_mutex);
4332         } else {
4333                 rc = ctx->rc;
4334                 total_read = ctx->total_len;
4335         }
4336
4337         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4338
4339         if (total_read) {
4340                 iocb->ki_pos += total_read;
4341                 return total_read;
4342         }
4343         return rc;
4344 }
4345
4346 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4347 {
4348         return __cifs_readv(iocb, to, true);
4349 }
4350
4351 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4352 {
4353         return __cifs_readv(iocb, to, false);
4354 }
4355
4356 ssize_t
4357 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4358 {
4359         struct inode *inode = file_inode(iocb->ki_filp);
4360         struct cifsInodeInfo *cinode = CIFS_I(inode);
4361         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4362         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4363                                                 iocb->ki_filp->private_data;
4364         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4365         int rc = -EACCES;
4366
4367         /*
4368          * In strict cache mode we need to read from the server all the time
4369          * if we don't have level II oplock because the server can delay mtime
4370          * change - so we can't make a decision about inode invalidating.
4371          * And we can also fail with pagereading if there are mandatory locks
4372          * on pages affected by this read but not on the region from pos to
4373          * pos+len-1.
4374          */
4375         if (!CIFS_CACHE_READ(cinode))
4376                 return cifs_user_readv(iocb, to);
4377
4378         if (cap_unix(tcon->ses) &&
4379             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4380             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4381                 return generic_file_read_iter(iocb, to);
4382
4383         /*
4384          * We need to hold the sem to be sure nobody modifies lock list
4385          * with a brlock that prevents reading.
4386          */
4387         down_read(&cinode->lock_sem);
4388         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4389                                      tcon->ses->server->vals->shared_lock_type,
4390                                      0, NULL, CIFS_READ_OP))
4391                 rc = generic_file_read_iter(iocb, to);
4392         up_read(&cinode->lock_sem);
4393         return rc;
4394 }
4395
4396 static ssize_t
4397 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4398 {
4399         int rc = -EACCES;
4400         unsigned int bytes_read = 0;
4401         unsigned int total_read;
4402         unsigned int current_read_size;
4403         unsigned int rsize;
4404         struct cifs_sb_info *cifs_sb;
4405         struct cifs_tcon *tcon;
4406         struct TCP_Server_Info *server;
4407         unsigned int xid;
4408         char *cur_offset;
4409         struct cifsFileInfo *open_file;
4410         struct cifs_io_parms io_parms = {0};
4411         int buf_type = CIFS_NO_BUFFER;
4412         __u32 pid;
4413
4414         xid = get_xid();
4415         cifs_sb = CIFS_FILE_SB(file);
4416
4417         /* FIXME: set up handlers for larger reads and/or convert to async */
4418         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4419
4420         if (file->private_data == NULL) {
4421                 rc = -EBADF;
4422                 free_xid(xid);
4423                 return rc;
4424         }
4425         open_file = file->private_data;
4426         tcon = tlink_tcon(open_file->tlink);
4427         server = cifs_pick_channel(tcon->ses);
4428
4429         if (!server->ops->sync_read) {
4430                 free_xid(xid);
4431                 return -ENOSYS;
4432         }
4433
4434         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4435                 pid = open_file->pid;
4436         else
4437                 pid = current->tgid;
4438
4439         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4440                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4441
4442         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4443              total_read += bytes_read, cur_offset += bytes_read) {
4444                 do {
4445                         current_read_size = min_t(uint, read_size - total_read,
4446                                                   rsize);
4447                         /*
4448                          * For windows me and 9x we do not want to request more
4449                          * than it negotiated since it will refuse the read
4450                          * then.
4451                          */
4452                         if (!(tcon->ses->capabilities &
4453                                 tcon->ses->server->vals->cap_large_files)) {
4454                                 current_read_size = min_t(uint,
4455                                         current_read_size, CIFSMaxBufSize);
4456                         }
4457                         if (open_file->invalidHandle) {
4458                                 rc = cifs_reopen_file(open_file, true);
4459                                 if (rc != 0)
4460                                         break;
4461                         }
4462                         io_parms.pid = pid;
4463                         io_parms.tcon = tcon;
4464                         io_parms.offset = *offset;
4465                         io_parms.length = current_read_size;
4466                         io_parms.server = server;
4467                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4468                                                     &bytes_read, &cur_offset,
4469                                                     &buf_type);
4470                 } while (rc == -EAGAIN);
4471
4472                 if (rc || (bytes_read == 0)) {
4473                         if (total_read) {
4474                                 break;
4475                         } else {
4476                                 free_xid(xid);
4477                                 return rc;
4478                         }
4479                 } else {
4480                         cifs_stats_bytes_read(tcon, total_read);
4481                         *offset += bytes_read;
4482                 }
4483         }
4484         free_xid(xid);
4485         return total_read;
4486 }
4487
4488 /*
4489  * If the page is mmap'ed into a process' page tables, then we need to make
4490  * sure that it doesn't change while being written back.
4491  */
4492 static vm_fault_t
4493 cifs_page_mkwrite(struct vm_fault *vmf)
4494 {
4495         struct page *page = vmf->page;
4496
4497         /* Wait for the page to be written to the cache before we allow it to
4498          * be modified.  We then assume the entire page will need writing back.
4499          */
4500 #ifdef CONFIG_CIFS_FSCACHE
4501         if (PageFsCache(page) &&
4502             wait_on_page_fscache_killable(page) < 0)
4503                 return VM_FAULT_RETRY;
4504 #endif
4505
4506         wait_on_page_writeback(page);
4507
4508         if (lock_page_killable(page) < 0)
4509                 return VM_FAULT_RETRY;
4510         return VM_FAULT_LOCKED;
4511 }
4512
4513 static const struct vm_operations_struct cifs_file_vm_ops = {
4514         .fault = filemap_fault,
4515         .map_pages = filemap_map_pages,
4516         .page_mkwrite = cifs_page_mkwrite,
4517 };
4518
4519 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4520 {
4521         int xid, rc = 0;
4522         struct inode *inode = file_inode(file);
4523
4524         xid = get_xid();
4525
4526         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4527                 rc = cifs_zap_mapping(inode);
4528         if (!rc)
4529                 rc = generic_file_mmap(file, vma);
4530         if (!rc)
4531                 vma->vm_ops = &cifs_file_vm_ops;
4532
4533         free_xid(xid);
4534         return rc;
4535 }
4536
4537 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4538 {
4539         int rc, xid;
4540
4541         xid = get_xid();
4542
4543         rc = cifs_revalidate_file(file);
4544         if (rc)
4545                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4546                          rc);
4547         if (!rc)
4548                 rc = generic_file_mmap(file, vma);
4549         if (!rc)
4550                 vma->vm_ops = &cifs_file_vm_ops;
4551
4552         free_xid(xid);
4553         return rc;
4554 }
4555
4556 static void
4557 cifs_readv_complete(struct work_struct *work)
4558 {
4559         unsigned int i, got_bytes;
4560         struct cifs_readdata *rdata = container_of(work,
4561                                                 struct cifs_readdata, work);
4562
4563         got_bytes = rdata->got_bytes;
4564         for (i = 0; i < rdata->nr_pages; i++) {
4565                 struct page *page = rdata->pages[i];
4566
4567                 if (rdata->result == 0 ||
4568                     (rdata->result == -EAGAIN && got_bytes)) {
4569                         flush_dcache_page(page);
4570                         SetPageUptodate(page);
4571                 } else
4572                         SetPageError(page);
4573
4574                 if (rdata->result == 0 ||
4575                     (rdata->result == -EAGAIN && got_bytes))
4576                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4577
4578                 unlock_page(page);
4579
4580                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4581
4582                 put_page(page);
4583                 rdata->pages[i] = NULL;
4584         }
4585         kref_put(&rdata->refcount, cifs_readdata_release);
4586 }
4587
4588 static int
4589 readpages_fill_pages(struct TCP_Server_Info *server,
4590                      struct cifs_readdata *rdata, struct iov_iter *iter,
4591                      unsigned int len)
4592 {
4593         int result = 0;
4594         unsigned int i;
4595         u64 eof;
4596         pgoff_t eof_index;
4597         unsigned int nr_pages = rdata->nr_pages;
4598         unsigned int page_offset = rdata->page_offset;
4599
4600         /* determine the eof that the server (probably) has */
4601         eof = CIFS_I(rdata->mapping->host)->server_eof;
4602         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4603         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4604
4605         rdata->got_bytes = 0;
4606         rdata->tailsz = PAGE_SIZE;
4607         for (i = 0; i < nr_pages; i++) {
4608                 struct page *page = rdata->pages[i];
4609                 unsigned int to_read = rdata->pagesz;
4610                 size_t n;
4611
4612                 if (i == 0)
4613                         to_read -= page_offset;
4614                 else
4615                         page_offset = 0;
4616
4617                 n = to_read;
4618
4619                 if (len >= to_read) {
4620                         len -= to_read;
4621                 } else if (len > 0) {
4622                         /* enough for partial page, fill and zero the rest */
4623                         zero_user(page, len + page_offset, to_read - len);
4624                         n = rdata->tailsz = len;
4625                         len = 0;
4626                 } else if (page->index > eof_index) {
4627                         /*
4628                          * The VFS will not try to do readahead past the
4629                          * i_size, but it's possible that we have outstanding
4630                          * writes with gaps in the middle and the i_size hasn't
4631                          * caught up yet. Populate those with zeroed out pages
4632                          * to prevent the VFS from repeatedly attempting to
4633                          * fill them until the writes are flushed.
4634                          */
4635                         zero_user(page, 0, PAGE_SIZE);
4636                         flush_dcache_page(page);
4637                         SetPageUptodate(page);
4638                         unlock_page(page);
4639                         put_page(page);
4640                         rdata->pages[i] = NULL;
4641                         rdata->nr_pages--;
4642                         continue;
4643                 } else {
4644                         /* no need to hold page hostage */
4645                         unlock_page(page);
4646                         put_page(page);
4647                         rdata->pages[i] = NULL;
4648                         rdata->nr_pages--;
4649                         continue;
4650                 }
4651
4652                 if (iter)
4653                         result = copy_page_from_iter(
4654                                         page, page_offset, n, iter);
4655 #ifdef CONFIG_CIFS_SMB_DIRECT
4656                 else if (rdata->mr)
4657                         result = n;
4658 #endif
4659                 else
4660                         result = cifs_read_page_from_socket(
4661                                         server, page, page_offset, n);
4662                 if (result < 0)
4663                         break;
4664
4665                 rdata->got_bytes += result;
4666         }
4667
4668         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4669                                                 rdata->got_bytes : result;
4670 }
4671
4672 static int
4673 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4674                                struct cifs_readdata *rdata, unsigned int len)
4675 {
4676         return readpages_fill_pages(server, rdata, NULL, len);
4677 }
4678
4679 static int
4680 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4681                                struct cifs_readdata *rdata,
4682                                struct iov_iter *iter)
4683 {
4684         return readpages_fill_pages(server, rdata, iter, iter->count);
4685 }
4686
4687 static void cifs_readahead(struct readahead_control *ractl)
4688 {
4689         int rc;
4690         struct cifsFileInfo *open_file = ractl->file->private_data;
4691         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4692         struct TCP_Server_Info *server;
4693         pid_t pid;
4694         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4695         pgoff_t next_cached = ULONG_MAX;
4696         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4697                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4698         bool check_cache = caching;
4699
4700         xid = get_xid();
4701
4702         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4703                 pid = open_file->pid;
4704         else
4705                 pid = current->tgid;
4706
4707         rc = 0;
4708         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4709
4710         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4711                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4712
4713         /*
4714          * Chop the readahead request up into rsize-sized read requests.
4715          */
4716         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4717                 unsigned int i, got, rsize;
4718                 struct page *page;
4719                 struct cifs_readdata *rdata;
4720                 struct cifs_credits credits_on_stack;
4721                 struct cifs_credits *credits = &credits_on_stack;
4722                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4723
4724                 /*
4725                  * Find out if we have anything cached in the range of
4726                  * interest, and if so, where the next chunk of cached data is.
4727                  */
4728                 if (caching) {
4729                         if (check_cache) {
4730                                 rc = cifs_fscache_query_occupancy(
4731                                         ractl->mapping->host, index, nr_pages,
4732                                         &next_cached, &cache_nr_pages);
4733                                 if (rc < 0)
4734                                         caching = false;
4735                                 check_cache = false;
4736                         }
4737
4738                         if (index == next_cached) {
4739                                 /*
4740                                  * TODO: Send a whole batch of pages to be read
4741                                  * by the cache.
4742                                  */
4743                                 struct folio *folio = readahead_folio(ractl);
4744
4745                                 last_batch_size = folio_nr_pages(folio);
4746                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4747                                                                &folio->page) < 0) {
4748                                         /*
4749                                          * TODO: Deal with cache read failure
4750                                          * here, but for the moment, delegate
4751                                          * that to readpage.
4752                                          */
4753                                         caching = false;
4754                                 }
4755                                 folio_unlock(folio);
4756                                 next_cached++;
4757                                 cache_nr_pages--;
4758                                 if (cache_nr_pages == 0)
4759                                         check_cache = true;
4760                                 continue;
4761                         }
4762                 }
4763
4764                 if (open_file->invalidHandle) {
4765                         rc = cifs_reopen_file(open_file, true);
4766                         if (rc) {
4767                                 if (rc == -EAGAIN)
4768                                         continue;
4769                                 break;
4770                         }
4771                 }
4772
4773                 if (cifs_sb->ctx->rsize == 0)
4774                         cifs_sb->ctx->rsize =
4775                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4776                                                              cifs_sb->ctx);
4777
4778                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4779                                                    &rsize, credits);
4780                 if (rc)
4781                         break;
4782                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4783                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4784
4785                 /*
4786                  * Give up immediately if rsize is too small to read an entire
4787                  * page. The VFS will fall back to readpage. We should never
4788                  * reach this point however since we set ra_pages to 0 when the
4789                  * rsize is smaller than a cache page.
4790                  */
4791                 if (unlikely(!nr_pages)) {
4792                         add_credits_and_wake_if(server, credits, 0);
4793                         break;
4794                 }
4795
4796                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4797                 if (!rdata) {
4798                         /* best to give up if we're out of mem */
4799                         add_credits_and_wake_if(server, credits, 0);
4800                         break;
4801                 }
4802
4803                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4804                 if (got != nr_pages) {
4805                         pr_warn("__readahead_batch() returned %u/%u\n",
4806                                 got, nr_pages);
4807                         nr_pages = got;
4808                 }
4809
4810                 rdata->nr_pages = nr_pages;
4811                 rdata->bytes    = readahead_batch_length(ractl);
4812                 rdata->cfile    = cifsFileInfo_get(open_file);
4813                 rdata->server   = server;
4814                 rdata->mapping  = ractl->mapping;
4815                 rdata->offset   = readahead_pos(ractl);
4816                 rdata->pid      = pid;
4817                 rdata->pagesz   = PAGE_SIZE;
4818                 rdata->tailsz   = PAGE_SIZE;
4819                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4820                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4821                 rdata->credits  = credits_on_stack;
4822
4823                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4824                 if (!rc) {
4825                         if (rdata->cfile->invalidHandle)
4826                                 rc = -EAGAIN;
4827                         else
4828                                 rc = server->ops->async_readv(rdata);
4829                 }
4830
4831                 if (rc) {
4832                         add_credits_and_wake_if(server, &rdata->credits, 0);
4833                         for (i = 0; i < rdata->nr_pages; i++) {
4834                                 page = rdata->pages[i];
4835                                 unlock_page(page);
4836                                 put_page(page);
4837                         }
4838                         /* Fallback to the readpage in error/reconnect cases */
4839                         kref_put(&rdata->refcount, cifs_readdata_release);
4840                         break;
4841                 }
4842
4843                 kref_put(&rdata->refcount, cifs_readdata_release);
4844                 last_batch_size = nr_pages;
4845         }
4846
4847         free_xid(xid);
4848 }
4849
4850 /*
4851  * cifs_readpage_worker must be called with the page pinned
4852  */
4853 static int cifs_readpage_worker(struct file *file, struct page *page,
4854         loff_t *poffset)
4855 {
4856         char *read_data;
4857         int rc;
4858
4859         /* Is the page cached? */
4860         rc = cifs_readpage_from_fscache(file_inode(file), page);
4861         if (rc == 0)
4862                 goto read_complete;
4863
4864         read_data = kmap(page);
4865         /* for reads over a certain size could initiate async read ahead */
4866
4867         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4868
4869         if (rc < 0)
4870                 goto io_error;
4871         else
4872                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4873
4874         /* we do not want atime to be less than mtime, it broke some apps */
4875         file_inode(file)->i_atime = current_time(file_inode(file));
4876         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4877                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4878         else
4879                 file_inode(file)->i_atime = current_time(file_inode(file));
4880
4881         if (PAGE_SIZE > rc)
4882                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4883
4884         flush_dcache_page(page);
4885         SetPageUptodate(page);
4886
4887         /* send this page to the cache */
4888         cifs_readpage_to_fscache(file_inode(file), page);
4889
4890         rc = 0;
4891
4892 io_error:
4893         kunmap(page);
4894         unlock_page(page);
4895
4896 read_complete:
4897         return rc;
4898 }
4899
4900 static int cifs_read_folio(struct file *file, struct folio *folio)
4901 {
4902         struct page *page = &folio->page;
4903         loff_t offset = page_file_offset(page);
4904         int rc = -EACCES;
4905         unsigned int xid;
4906
4907         xid = get_xid();
4908
4909         if (file->private_data == NULL) {
4910                 rc = -EBADF;
4911                 free_xid(xid);
4912                 return rc;
4913         }
4914
4915         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4916                  page, (int)offset, (int)offset);
4917
4918         rc = cifs_readpage_worker(file, page, &offset);
4919
4920         free_xid(xid);
4921         return rc;
4922 }
4923
4924 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4925 {
4926         struct cifsFileInfo *open_file;
4927
4928         spin_lock(&cifs_inode->open_file_lock);
4929         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4930                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4931                         spin_unlock(&cifs_inode->open_file_lock);
4932                         return 1;
4933                 }
4934         }
4935         spin_unlock(&cifs_inode->open_file_lock);
4936         return 0;
4937 }
4938
4939 /* We do not want to update the file size from server for inodes
4940    open for write - to avoid races with writepage extending
4941    the file - in the future we could consider allowing
4942    refreshing the inode only on increases in the file size
4943    but this is tricky to do without racing with writebehind
4944    page caching in the current Linux kernel design */
4945 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4946 {
4947         if (!cifsInode)
4948                 return true;
4949
4950         if (is_inode_writable(cifsInode)) {
4951                 /* This inode is open for write at least once */
4952                 struct cifs_sb_info *cifs_sb;
4953
4954                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4955                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4956                         /* since no page cache to corrupt on directio
4957                         we can change size safely */
4958                         return true;
4959                 }
4960
4961                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4962                         return true;
4963
4964                 return false;
4965         } else
4966                 return true;
4967 }
4968
4969 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4970                         loff_t pos, unsigned len,
4971                         struct page **pagep, void **fsdata)
4972 {
4973         int oncethru = 0;
4974         pgoff_t index = pos >> PAGE_SHIFT;
4975         loff_t offset = pos & (PAGE_SIZE - 1);
4976         loff_t page_start = pos & PAGE_MASK;
4977         loff_t i_size;
4978         struct page *page;
4979         int rc = 0;
4980
4981         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4982
4983 start:
4984         page = grab_cache_page_write_begin(mapping, index);
4985         if (!page) {
4986                 rc = -ENOMEM;
4987                 goto out;
4988         }
4989
4990         if (PageUptodate(page))
4991                 goto out;
4992
4993         /*
4994          * If we write a full page it will be up to date, no need to read from
4995          * the server. If the write is short, we'll end up doing a sync write
4996          * instead.
4997          */
4998         if (len == PAGE_SIZE)
4999                 goto out;
5000
5001         /*
5002          * optimize away the read when we have an oplock, and we're not
5003          * expecting to use any of the data we'd be reading in. That
5004          * is, when the page lies beyond the EOF, or straddles the EOF
5005          * and the write will cover all of the existing data.
5006          */
5007         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
5008                 i_size = i_size_read(mapping->host);
5009                 if (page_start >= i_size ||
5010                     (offset == 0 && (pos + len) >= i_size)) {
5011                         zero_user_segments(page, 0, offset,
5012                                            offset + len,
5013                                            PAGE_SIZE);
5014                         /*
5015                          * PageChecked means that the parts of the page
5016                          * to which we're not writing are considered up
5017                          * to date. Once the data is copied to the
5018                          * page, it can be set uptodate.
5019                          */
5020                         SetPageChecked(page);
5021                         goto out;
5022                 }
5023         }
5024
5025         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
5026                 /*
5027                  * might as well read a page, it is fast enough. If we get
5028                  * an error, we don't need to return it. cifs_write_end will
5029                  * do a sync write instead since PG_uptodate isn't set.
5030                  */
5031                 cifs_readpage_worker(file, page, &page_start);
5032                 put_page(page);
5033                 oncethru = 1;
5034                 goto start;
5035         } else {
5036                 /* we could try using another file handle if there is one -
5037                    but how would we lock it to prevent close of that handle
5038                    racing with this read? In any case
5039                    this will be written out by write_end so is fine */
5040         }
5041 out:
5042         *pagep = page;
5043         return rc;
5044 }
5045
5046 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5047 {
5048         if (folio_test_private(folio))
5049                 return 0;
5050         if (folio_test_fscache(folio)) {
5051                 if (current_is_kswapd() || !(gfp & __GFP_FS))
5052                         return false;
5053                 folio_wait_fscache(folio);
5054         }
5055         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5056         return true;
5057 }
5058
5059 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5060                                  size_t length)
5061 {
5062         folio_wait_fscache(folio);
5063 }
5064
5065 static int cifs_launder_folio(struct folio *folio)
5066 {
5067         int rc = 0;
5068         loff_t range_start = folio_pos(folio);
5069         loff_t range_end = range_start + folio_size(folio);
5070         struct writeback_control wbc = {
5071                 .sync_mode = WB_SYNC_ALL,
5072                 .nr_to_write = 0,
5073                 .range_start = range_start,
5074                 .range_end = range_end,
5075         };
5076
5077         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5078
5079         if (folio_clear_dirty_for_io(folio))
5080                 rc = cifs_writepage_locked(&folio->page, &wbc);
5081
5082         folio_wait_fscache(folio);
5083         return rc;
5084 }
5085
5086 void cifs_oplock_break(struct work_struct *work)
5087 {
5088         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5089                                                   oplock_break);
5090         struct inode *inode = d_inode(cfile->dentry);
5091         struct cifsInodeInfo *cinode = CIFS_I(inode);
5092         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
5093         struct TCP_Server_Info *server = tcon->ses->server;
5094         int rc = 0;
5095         bool purge_cache = false;
5096
5097         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5098                         TASK_UNINTERRUPTIBLE);
5099
5100         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5101                                       cfile->oplock_epoch, &purge_cache);
5102
5103         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5104                                                 cifs_has_mand_locks(cinode)) {
5105                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5106                          inode);
5107                 cinode->oplock = 0;
5108         }
5109
5110         if (inode && S_ISREG(inode->i_mode)) {
5111                 if (CIFS_CACHE_READ(cinode))
5112                         break_lease(inode, O_RDONLY);
5113                 else
5114                         break_lease(inode, O_WRONLY);
5115                 rc = filemap_fdatawrite(inode->i_mapping);
5116                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5117                         rc = filemap_fdatawait(inode->i_mapping);
5118                         mapping_set_error(inode->i_mapping, rc);
5119                         cifs_zap_mapping(inode);
5120                 }
5121                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5122                 if (CIFS_CACHE_WRITE(cinode))
5123                         goto oplock_break_ack;
5124         }
5125
5126         rc = cifs_push_locks(cfile);
5127         if (rc)
5128                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5129
5130 oplock_break_ack:
5131         /*
5132          * releasing stale oplock after recent reconnect of smb session using
5133          * a now incorrect file handle is not a data integrity issue but do
5134          * not bother sending an oplock release if session to server still is
5135          * disconnected since oplock already released by the server
5136          */
5137         if (!cfile->oplock_break_cancelled) {
5138                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
5139                                                              cinode);
5140                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5141         }
5142
5143         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5144         cifs_done_oplock_break(cinode);
5145 }
5146
5147 /*
5148  * The presence of cifs_direct_io() in the address space ops vector
5149  * allowes open() O_DIRECT flags which would have failed otherwise.
5150  *
5151  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5152  * so this method should never be called.
5153  *
5154  * Direct IO is not yet supported in the cached mode.
5155  */
5156 static ssize_t
5157 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5158 {
5159         /*
5160          * FIXME
5161          * Eventually need to support direct IO for non forcedirectio mounts
5162          */
5163         return -EINVAL;
5164 }
5165
5166 static int cifs_swap_activate(struct swap_info_struct *sis,
5167                               struct file *swap_file, sector_t *span)
5168 {
5169         struct cifsFileInfo *cfile = swap_file->private_data;
5170         struct inode *inode = swap_file->f_mapping->host;
5171         unsigned long blocks;
5172         long long isize;
5173
5174         cifs_dbg(FYI, "swap activate\n");
5175
5176         if (!swap_file->f_mapping->a_ops->swap_rw)
5177                 /* Cannot support swap */
5178                 return -EINVAL;
5179
5180         spin_lock(&inode->i_lock);
5181         blocks = inode->i_blocks;
5182         isize = inode->i_size;
5183         spin_unlock(&inode->i_lock);
5184         if (blocks*512 < isize) {
5185                 pr_warn("swap activate: swapfile has holes\n");
5186                 return -EINVAL;
5187         }
5188         *span = sis->pages;
5189
5190         pr_warn_once("Swap support over SMB3 is experimental\n");
5191
5192         /*
5193          * TODO: consider adding ACL (or documenting how) to prevent other
5194          * users (on this or other systems) from reading it
5195          */
5196
5197
5198         /* TODO: add sk_set_memalloc(inet) or similar */
5199
5200         if (cfile)
5201                 cfile->swapfile = true;
5202         /*
5203          * TODO: Since file already open, we can't open with DENY_ALL here
5204          * but we could add call to grab a byte range lock to prevent others
5205          * from reading or writing the file
5206          */
5207
5208         sis->flags |= SWP_FS_OPS;
5209         return add_swap_extent(sis, 0, sis->max, 0);
5210 }
5211
5212 static void cifs_swap_deactivate(struct file *file)
5213 {
5214         struct cifsFileInfo *cfile = file->private_data;
5215
5216         cifs_dbg(FYI, "swap deactivate\n");
5217
5218         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5219
5220         if (cfile)
5221                 cfile->swapfile = false;
5222
5223         /* do we need to unpin (or unlock) the file */
5224 }
5225
5226 /*
5227  * Mark a page as having been made dirty and thus needing writeback.  We also
5228  * need to pin the cache object to write back to.
5229  */
5230 #ifdef CONFIG_CIFS_FSCACHE
5231 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5232 {
5233         return fscache_dirty_folio(mapping, folio,
5234                                         cifs_inode_cookie(mapping->host));
5235 }
5236 #else
5237 #define cifs_dirty_folio filemap_dirty_folio
5238 #endif
5239
5240 const struct address_space_operations cifs_addr_ops = {
5241         .read_folio = cifs_read_folio,
5242         .readahead = cifs_readahead,
5243         .writepages = cifs_writepages,
5244         .write_begin = cifs_write_begin,
5245         .write_end = cifs_write_end,
5246         .dirty_folio = cifs_dirty_folio,
5247         .release_folio = cifs_release_folio,
5248         .direct_IO = cifs_direct_io,
5249         .invalidate_folio = cifs_invalidate_folio,
5250         .launder_folio = cifs_launder_folio,
5251         .migrate_folio = filemap_migrate_folio,
5252         /*
5253          * TODO: investigate and if useful we could add an is_dirty_writeback
5254          * helper if needed
5255          */
5256         .swap_activate = cifs_swap_activate,
5257         .swap_deactivate = cifs_swap_deactivate,
5258 };
5259
5260 /*
5261  * cifs_readahead requires the server to support a buffer large enough to
5262  * contain the header plus one complete page of data.  Otherwise, we need
5263  * to leave cifs_readahead out of the address space operations.
5264  */
5265 const struct address_space_operations cifs_addr_ops_smallbuf = {
5266         .read_folio = cifs_read_folio,
5267         .writepages = cifs_writepages,
5268         .write_begin = cifs_write_begin,
5269         .write_end = cifs_write_end,
5270         .dirty_folio = cifs_dirty_folio,
5271         .release_folio = cifs_release_folio,
5272         .invalidate_folio = cifs_invalidate_folio,
5273         .launder_folio = cifs_launder_folio,
5274         .migrate_folio = filemap_migrate_folio,
5275 };