OSDN Git Service

3b942ecdd4be76c3c1bb5e0c996bf34a41357f60
[tomoyo/tomoyo-test1.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226         if (f_flags & O_SYNC)
227                 create_options |= CREATE_WRITE_THROUGH;
228
229         if (f_flags & O_DIRECT)
230                 create_options |= CREATE_NO_BUFFER;
231
232         oparms.tcon = tcon;
233         oparms.cifs_sb = cifs_sb;
234         oparms.desired_access = desired_access;
235         oparms.create_options = cifs_create_options(cifs_sb, create_options);
236         oparms.disposition = disposition;
237         oparms.path = full_path;
238         oparms.fid = fid;
239         oparms.reconnect = false;
240
241         rc = server->ops->open(xid, &oparms, oplock, buf);
242
243         if (rc)
244                 goto out;
245
246         if (tcon->unix_ext)
247                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
248                                               xid);
249         else
250                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
251                                          xid, fid);
252
253         if (rc) {
254                 server->ops->close(xid, tcon, fid);
255                 if (rc == -ESTALE)
256                         rc = -EOPENSTALE;
257         }
258
259 out:
260         kfree(buf);
261         return rc;
262 }
263
264 static bool
265 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
266 {
267         struct cifs_fid_locks *cur;
268         bool has_locks = false;
269
270         down_read(&cinode->lock_sem);
271         list_for_each_entry(cur, &cinode->llist, llist) {
272                 if (!list_empty(&cur->locks)) {
273                         has_locks = true;
274                         break;
275                 }
276         }
277         up_read(&cinode->lock_sem);
278         return has_locks;
279 }
280
281 void
282 cifs_down_write(struct rw_semaphore *sem)
283 {
284         while (!down_write_trylock(sem))
285                 msleep(10);
286 }
287
288 static void cifsFileInfo_put_work(struct work_struct *work);
289
290 struct cifsFileInfo *
291 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
292                   struct tcon_link *tlink, __u32 oplock)
293 {
294         struct dentry *dentry = file_dentry(file);
295         struct inode *inode = d_inode(dentry);
296         struct cifsInodeInfo *cinode = CIFS_I(inode);
297         struct cifsFileInfo *cfile;
298         struct cifs_fid_locks *fdlocks;
299         struct cifs_tcon *tcon = tlink_tcon(tlink);
300         struct TCP_Server_Info *server = tcon->ses->server;
301
302         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
303         if (cfile == NULL)
304                 return cfile;
305
306         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
307         if (!fdlocks) {
308                 kfree(cfile);
309                 return NULL;
310         }
311
312         INIT_LIST_HEAD(&fdlocks->locks);
313         fdlocks->cfile = cfile;
314         cfile->llist = fdlocks;
315
316         cfile->count = 1;
317         cfile->pid = current->tgid;
318         cfile->uid = current_fsuid();
319         cfile->dentry = dget(dentry);
320         cfile->f_flags = file->f_flags;
321         cfile->invalidHandle = false;
322         cfile->tlink = cifs_get_tlink(tlink);
323         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
324         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
325         mutex_init(&cfile->fh_mutex);
326         spin_lock_init(&cfile->file_info_lock);
327
328         cifs_sb_active(inode->i_sb);
329
330         /*
331          * If the server returned a read oplock and we have mandatory brlocks,
332          * set oplock level to None.
333          */
334         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336                 oplock = 0;
337         }
338
339         cifs_down_write(&cinode->lock_sem);
340         list_add(&fdlocks->llist, &cinode->llist);
341         up_write(&cinode->lock_sem);
342
343         spin_lock(&tcon->open_file_lock);
344         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345                 oplock = fid->pending_open->oplock;
346         list_del(&fid->pending_open->olist);
347
348         fid->purge_cache = false;
349         server->ops->set_fid(cfile, fid, oplock);
350
351         list_add(&cfile->tlist, &tcon->openFileList);
352         atomic_inc(&tcon->num_local_opens);
353
354         /* if readable file instance put first in list*/
355         spin_lock(&cinode->open_file_lock);
356         if (file->f_mode & FMODE_READ)
357                 list_add(&cfile->flist, &cinode->openFileList);
358         else
359                 list_add_tail(&cfile->flist, &cinode->openFileList);
360         spin_unlock(&cinode->open_file_lock);
361         spin_unlock(&tcon->open_file_lock);
362
363         if (fid->purge_cache)
364                 cifs_zap_mapping(inode);
365
366         file->private_data = cfile;
367         return cfile;
368 }
369
370 struct cifsFileInfo *
371 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
372 {
373         spin_lock(&cifs_file->file_info_lock);
374         cifsFileInfo_get_locked(cifs_file);
375         spin_unlock(&cifs_file->file_info_lock);
376         return cifs_file;
377 }
378
379 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
380 {
381         struct inode *inode = d_inode(cifs_file->dentry);
382         struct cifsInodeInfo *cifsi = CIFS_I(inode);
383         struct cifsLockInfo *li, *tmp;
384         struct super_block *sb = inode->i_sb;
385
386         /*
387          * Delete any outstanding lock records. We'll lose them when the file
388          * is closed anyway.
389          */
390         cifs_down_write(&cifsi->lock_sem);
391         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
392                 list_del(&li->llist);
393                 cifs_del_lock_waiters(li);
394                 kfree(li);
395         }
396         list_del(&cifs_file->llist->llist);
397         kfree(cifs_file->llist);
398         up_write(&cifsi->lock_sem);
399
400         cifs_put_tlink(cifs_file->tlink);
401         dput(cifs_file->dentry);
402         cifs_sb_deactive(sb);
403         kfree(cifs_file);
404 }
405
406 static void cifsFileInfo_put_work(struct work_struct *work)
407 {
408         struct cifsFileInfo *cifs_file = container_of(work,
409                         struct cifsFileInfo, put);
410
411         cifsFileInfo_put_final(cifs_file);
412 }
413
414 /**
415  * cifsFileInfo_put - release a reference of file priv data
416  *
417  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
418  */
419 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
420 {
421         _cifsFileInfo_put(cifs_file, true, true);
422 }
423
424 /**
425  * _cifsFileInfo_put - release a reference of file priv data
426  *
427  * This may involve closing the filehandle @cifs_file out on the
428  * server. Must be called without holding tcon->open_file_lock,
429  * cinode->open_file_lock and cifs_file->file_info_lock.
430  *
431  * If @wait_for_oplock_handler is true and we are releasing the last
432  * reference, wait for any running oplock break handler of the file
433  * and cancel any pending one. If calling this function from the
434  * oplock break handler, you need to pass false.
435  *
436  */
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438                        bool wait_oplock_handler, bool offload)
439 {
440         struct inode *inode = d_inode(cifs_file->dentry);
441         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442         struct TCP_Server_Info *server = tcon->ses->server;
443         struct cifsInodeInfo *cifsi = CIFS_I(inode);
444         struct super_block *sb = inode->i_sb;
445         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446         struct cifs_fid fid;
447         struct cifs_pending_open open;
448         bool oplock_break_cancelled;
449
450         spin_lock(&tcon->open_file_lock);
451         spin_lock(&cifsi->open_file_lock);
452         spin_lock(&cifs_file->file_info_lock);
453         if (--cifs_file->count > 0) {
454                 spin_unlock(&cifs_file->file_info_lock);
455                 spin_unlock(&cifsi->open_file_lock);
456                 spin_unlock(&tcon->open_file_lock);
457                 return;
458         }
459         spin_unlock(&cifs_file->file_info_lock);
460
461         if (server->ops->get_lease_key)
462                 server->ops->get_lease_key(inode, &fid);
463
464         /* store open in pending opens to make sure we don't miss lease break */
465         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
466
467         /* remove it from the lists */
468         list_del(&cifs_file->flist);
469         list_del(&cifs_file->tlist);
470         atomic_dec(&tcon->num_local_opens);
471
472         if (list_empty(&cifsi->openFileList)) {
473                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474                          d_inode(cifs_file->dentry));
475                 /*
476                  * In strict cache mode we need invalidate mapping on the last
477                  * close  because it may cause a error when we open this file
478                  * again and get at least level II oplock.
479                  */
480                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482                 cifs_set_oplock_level(cifsi, 0);
483         }
484
485         spin_unlock(&cifsi->open_file_lock);
486         spin_unlock(&tcon->open_file_lock);
487
488         oplock_break_cancelled = wait_oplock_handler ?
489                 cancel_work_sync(&cifs_file->oplock_break) : false;
490
491         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492                 struct TCP_Server_Info *server = tcon->ses->server;
493                 unsigned int xid;
494
495                 xid = get_xid();
496                 if (server->ops->close_getattr)
497                         server->ops->close_getattr(xid, tcon, cifs_file);
498                 else if (server->ops->close)
499                         server->ops->close(xid, tcon, &cifs_file->fid);
500                 _free_xid(xid);
501         }
502
503         if (oplock_break_cancelled)
504                 cifs_done_oplock_break(cifsi);
505
506         cifs_del_pending_open(&open);
507
508         if (offload)
509                 queue_work(fileinfo_put_wq, &cifs_file->put);
510         else
511                 cifsFileInfo_put_final(cifs_file);
512 }
513
514 int cifs_open(struct inode *inode, struct file *file)
515
516 {
517         int rc = -EACCES;
518         unsigned int xid;
519         __u32 oplock;
520         struct cifs_sb_info *cifs_sb;
521         struct TCP_Server_Info *server;
522         struct cifs_tcon *tcon;
523         struct tcon_link *tlink;
524         struct cifsFileInfo *cfile = NULL;
525         char *full_path = NULL;
526         bool posix_open_ok = false;
527         struct cifs_fid fid;
528         struct cifs_pending_open open;
529
530         xid = get_xid();
531
532         cifs_sb = CIFS_SB(inode->i_sb);
533         tlink = cifs_sb_tlink(cifs_sb);
534         if (IS_ERR(tlink)) {
535                 free_xid(xid);
536                 return PTR_ERR(tlink);
537         }
538         tcon = tlink_tcon(tlink);
539         server = tcon->ses->server;
540
541         full_path = build_path_from_dentry(file_dentry(file));
542         if (full_path == NULL) {
543                 rc = -ENOMEM;
544                 goto out;
545         }
546
547         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
548                  inode, file->f_flags, full_path);
549
550         if (file->f_flags & O_DIRECT &&
551             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
552                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
553                         file->f_op = &cifs_file_direct_nobrl_ops;
554                 else
555                         file->f_op = &cifs_file_direct_ops;
556         }
557
558         if (server->oplocks)
559                 oplock = REQ_OPLOCK;
560         else
561                 oplock = 0;
562
563         if (!tcon->broken_posix_open && tcon->unix_ext &&
564             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
565                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
566                 /* can not refresh inode info since size could be stale */
567                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
568                                 cifs_sb->mnt_file_mode /* ignored */,
569                                 file->f_flags, &oplock, &fid.netfid, xid);
570                 if (rc == 0) {
571                         cifs_dbg(FYI, "posix open succeeded\n");
572                         posix_open_ok = true;
573                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
574                         if (tcon->ses->serverNOS)
575                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
576                                          tcon->ses->serverName,
577                                          tcon->ses->serverNOS);
578                         tcon->broken_posix_open = true;
579                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
580                          (rc != -EOPNOTSUPP)) /* path not found or net err */
581                         goto out;
582                 /*
583                  * Else fallthrough to retry open the old way on network i/o
584                  * or DFS errors.
585                  */
586         }
587
588         if (server->ops->get_lease_key)
589                 server->ops->get_lease_key(inode, &fid);
590
591         cifs_add_pending_open(&fid, tlink, &open);
592
593         if (!posix_open_ok) {
594                 if (server->ops->get_lease_key)
595                         server->ops->get_lease_key(inode, &fid);
596
597                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
598                                   file->f_flags, &oplock, &fid, xid);
599                 if (rc) {
600                         cifs_del_pending_open(&open);
601                         goto out;
602                 }
603         }
604
605         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
606         if (cfile == NULL) {
607                 if (server->ops->close)
608                         server->ops->close(xid, tcon, &fid);
609                 cifs_del_pending_open(&open);
610                 rc = -ENOMEM;
611                 goto out;
612         }
613
614         cifs_fscache_set_inode_cookie(inode, file);
615
616         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
617                 /*
618                  * Time to set mode which we can not set earlier due to
619                  * problems creating new read-only files.
620                  */
621                 struct cifs_unix_set_info_args args = {
622                         .mode   = inode->i_mode,
623                         .uid    = INVALID_UID, /* no change */
624                         .gid    = INVALID_GID, /* no change */
625                         .ctime  = NO_CHANGE_64,
626                         .atime  = NO_CHANGE_64,
627                         .mtime  = NO_CHANGE_64,
628                         .device = 0,
629                 };
630                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
631                                        cfile->pid);
632         }
633
634 out:
635         kfree(full_path);
636         free_xid(xid);
637         cifs_put_tlink(tlink);
638         return rc;
639 }
640
641 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
642
643 /*
644  * Try to reacquire byte range locks that were released when session
645  * to server was lost.
646  */
647 static int
648 cifs_relock_file(struct cifsFileInfo *cfile)
649 {
650         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
651         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
652         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
653         int rc = 0;
654
655         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
656         if (cinode->can_cache_brlcks) {
657                 /* can cache locks - no need to relock */
658                 up_read(&cinode->lock_sem);
659                 return rc;
660         }
661
662         if (cap_unix(tcon->ses) &&
663             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
664             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
665                 rc = cifs_push_posix_locks(cfile);
666         else
667                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
668
669         up_read(&cinode->lock_sem);
670         return rc;
671 }
672
673 static int
674 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
675 {
676         int rc = -EACCES;
677         unsigned int xid;
678         __u32 oplock;
679         struct cifs_sb_info *cifs_sb;
680         struct cifs_tcon *tcon;
681         struct TCP_Server_Info *server;
682         struct cifsInodeInfo *cinode;
683         struct inode *inode;
684         char *full_path = NULL;
685         int desired_access;
686         int disposition = FILE_OPEN;
687         int create_options = CREATE_NOT_DIR;
688         struct cifs_open_parms oparms;
689
690         xid = get_xid();
691         mutex_lock(&cfile->fh_mutex);
692         if (!cfile->invalidHandle) {
693                 mutex_unlock(&cfile->fh_mutex);
694                 rc = 0;
695                 free_xid(xid);
696                 return rc;
697         }
698
699         inode = d_inode(cfile->dentry);
700         cifs_sb = CIFS_SB(inode->i_sb);
701         tcon = tlink_tcon(cfile->tlink);
702         server = tcon->ses->server;
703
704         /*
705          * Can not grab rename sem here because various ops, including those
706          * that already have the rename sem can end up causing writepage to get
707          * called and if the server was down that means we end up here, and we
708          * can never tell if the caller already has the rename_sem.
709          */
710         full_path = build_path_from_dentry(cfile->dentry);
711         if (full_path == NULL) {
712                 rc = -ENOMEM;
713                 mutex_unlock(&cfile->fh_mutex);
714                 free_xid(xid);
715                 return rc;
716         }
717
718         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
719                  inode, cfile->f_flags, full_path);
720
721         if (tcon->ses->server->oplocks)
722                 oplock = REQ_OPLOCK;
723         else
724                 oplock = 0;
725
726         if (tcon->unix_ext && cap_unix(tcon->ses) &&
727             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
728                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
729                 /*
730                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
731                  * original open. Must mask them off for a reopen.
732                  */
733                 unsigned int oflags = cfile->f_flags &
734                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
735
736                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
737                                      cifs_sb->mnt_file_mode /* ignored */,
738                                      oflags, &oplock, &cfile->fid.netfid, xid);
739                 if (rc == 0) {
740                         cifs_dbg(FYI, "posix reopen succeeded\n");
741                         oparms.reconnect = true;
742                         goto reopen_success;
743                 }
744                 /*
745                  * fallthrough to retry open the old way on errors, especially
746                  * in the reconnect path it is important to retry hard
747                  */
748         }
749
750         desired_access = cifs_convert_flags(cfile->f_flags);
751
752         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
753         if (cfile->f_flags & O_SYNC)
754                 create_options |= CREATE_WRITE_THROUGH;
755
756         if (cfile->f_flags & O_DIRECT)
757                 create_options |= CREATE_NO_BUFFER;
758
759         if (server->ops->get_lease_key)
760                 server->ops->get_lease_key(inode, &cfile->fid);
761
762         oparms.tcon = tcon;
763         oparms.cifs_sb = cifs_sb;
764         oparms.desired_access = desired_access;
765         oparms.create_options = cifs_create_options(cifs_sb, create_options);
766         oparms.disposition = disposition;
767         oparms.path = full_path;
768         oparms.fid = &cfile->fid;
769         oparms.reconnect = true;
770
771         /*
772          * Can not refresh inode by passing in file_info buf to be returned by
773          * ops->open and then calling get_inode_info with returned buf since
774          * file might have write behind data that needs to be flushed and server
775          * version of file size can be stale. If we knew for sure that inode was
776          * not dirty locally we could do this.
777          */
778         rc = server->ops->open(xid, &oparms, &oplock, NULL);
779         if (rc == -ENOENT && oparms.reconnect == false) {
780                 /* durable handle timeout is expired - open the file again */
781                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
782                 /* indicate that we need to relock the file */
783                 oparms.reconnect = true;
784         }
785
786         if (rc) {
787                 mutex_unlock(&cfile->fh_mutex);
788                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
789                 cifs_dbg(FYI, "oplock: %d\n", oplock);
790                 goto reopen_error_exit;
791         }
792
793 reopen_success:
794         cfile->invalidHandle = false;
795         mutex_unlock(&cfile->fh_mutex);
796         cinode = CIFS_I(inode);
797
798         if (can_flush) {
799                 rc = filemap_write_and_wait(inode->i_mapping);
800                 if (!is_interrupt_error(rc))
801                         mapping_set_error(inode->i_mapping, rc);
802
803                 if (tcon->unix_ext)
804                         rc = cifs_get_inode_info_unix(&inode, full_path,
805                                                       inode->i_sb, xid);
806                 else
807                         rc = cifs_get_inode_info(&inode, full_path, NULL,
808                                                  inode->i_sb, xid, NULL);
809         }
810         /*
811          * Else we are writing out data to server already and could deadlock if
812          * we tried to flush data, and since we do not know if we have data that
813          * would invalidate the current end of file on the server we can not go
814          * to the server to get the new inode info.
815          */
816
817         /*
818          * If the server returned a read oplock and we have mandatory brlocks,
819          * set oplock level to None.
820          */
821         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
822                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
823                 oplock = 0;
824         }
825
826         server->ops->set_fid(cfile, &cfile->fid, oplock);
827         if (oparms.reconnect)
828                 cifs_relock_file(cfile);
829
830 reopen_error_exit:
831         kfree(full_path);
832         free_xid(xid);
833         return rc;
834 }
835
836 int cifs_close(struct inode *inode, struct file *file)
837 {
838         if (file->private_data != NULL) {
839                 _cifsFileInfo_put(file->private_data, true, false);
840                 file->private_data = NULL;
841         }
842
843         /* return code from the ->release op is always ignored */
844         return 0;
845 }
846
847 void
848 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
849 {
850         struct cifsFileInfo *open_file;
851         struct list_head *tmp;
852         struct list_head *tmp1;
853         struct list_head tmp_list;
854
855         if (!tcon->use_persistent || !tcon->need_reopen_files)
856                 return;
857
858         tcon->need_reopen_files = false;
859
860         cifs_dbg(FYI, "Reopen persistent handles");
861         INIT_LIST_HEAD(&tmp_list);
862
863         /* list all files open on tree connection, reopen resilient handles  */
864         spin_lock(&tcon->open_file_lock);
865         list_for_each(tmp, &tcon->openFileList) {
866                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
867                 if (!open_file->invalidHandle)
868                         continue;
869                 cifsFileInfo_get(open_file);
870                 list_add_tail(&open_file->rlist, &tmp_list);
871         }
872         spin_unlock(&tcon->open_file_lock);
873
874         list_for_each_safe(tmp, tmp1, &tmp_list) {
875                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
876                 if (cifs_reopen_file(open_file, false /* do not flush */))
877                         tcon->need_reopen_files = true;
878                 list_del_init(&open_file->rlist);
879                 cifsFileInfo_put(open_file);
880         }
881 }
882
883 int cifs_closedir(struct inode *inode, struct file *file)
884 {
885         int rc = 0;
886         unsigned int xid;
887         struct cifsFileInfo *cfile = file->private_data;
888         struct cifs_tcon *tcon;
889         struct TCP_Server_Info *server;
890         char *buf;
891
892         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
893
894         if (cfile == NULL)
895                 return rc;
896
897         xid = get_xid();
898         tcon = tlink_tcon(cfile->tlink);
899         server = tcon->ses->server;
900
901         cifs_dbg(FYI, "Freeing private data in close dir\n");
902         spin_lock(&cfile->file_info_lock);
903         if (server->ops->dir_needs_close(cfile)) {
904                 cfile->invalidHandle = true;
905                 spin_unlock(&cfile->file_info_lock);
906                 if (server->ops->close_dir)
907                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
908                 else
909                         rc = -ENOSYS;
910                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
911                 /* not much we can do if it fails anyway, ignore rc */
912                 rc = 0;
913         } else
914                 spin_unlock(&cfile->file_info_lock);
915
916         buf = cfile->srch_inf.ntwrk_buf_start;
917         if (buf) {
918                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
919                 cfile->srch_inf.ntwrk_buf_start = NULL;
920                 if (cfile->srch_inf.smallBuf)
921                         cifs_small_buf_release(buf);
922                 else
923                         cifs_buf_release(buf);
924         }
925
926         cifs_put_tlink(cfile->tlink);
927         kfree(file->private_data);
928         file->private_data = NULL;
929         /* BB can we lock the filestruct while this is going on? */
930         free_xid(xid);
931         return rc;
932 }
933
934 static struct cifsLockInfo *
935 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
936 {
937         struct cifsLockInfo *lock =
938                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
939         if (!lock)
940                 return lock;
941         lock->offset = offset;
942         lock->length = length;
943         lock->type = type;
944         lock->pid = current->tgid;
945         lock->flags = flags;
946         INIT_LIST_HEAD(&lock->blist);
947         init_waitqueue_head(&lock->block_q);
948         return lock;
949 }
950
951 void
952 cifs_del_lock_waiters(struct cifsLockInfo *lock)
953 {
954         struct cifsLockInfo *li, *tmp;
955         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
956                 list_del_init(&li->blist);
957                 wake_up(&li->block_q);
958         }
959 }
960
961 #define CIFS_LOCK_OP    0
962 #define CIFS_READ_OP    1
963 #define CIFS_WRITE_OP   2
964
965 /* @rw_check : 0 - no op, 1 - read, 2 - write */
966 static bool
967 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
968                             __u64 length, __u8 type, __u16 flags,
969                             struct cifsFileInfo *cfile,
970                             struct cifsLockInfo **conf_lock, int rw_check)
971 {
972         struct cifsLockInfo *li;
973         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
974         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
975
976         list_for_each_entry(li, &fdlocks->locks, llist) {
977                 if (offset + length <= li->offset ||
978                     offset >= li->offset + li->length)
979                         continue;
980                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
981                     server->ops->compare_fids(cfile, cur_cfile)) {
982                         /* shared lock prevents write op through the same fid */
983                         if (!(li->type & server->vals->shared_lock_type) ||
984                             rw_check != CIFS_WRITE_OP)
985                                 continue;
986                 }
987                 if ((type & server->vals->shared_lock_type) &&
988                     ((server->ops->compare_fids(cfile, cur_cfile) &&
989                      current->tgid == li->pid) || type == li->type))
990                         continue;
991                 if (rw_check == CIFS_LOCK_OP &&
992                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
993                     server->ops->compare_fids(cfile, cur_cfile))
994                         continue;
995                 if (conf_lock)
996                         *conf_lock = li;
997                 return true;
998         }
999         return false;
1000 }
1001
1002 bool
1003 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1004                         __u8 type, __u16 flags,
1005                         struct cifsLockInfo **conf_lock, int rw_check)
1006 {
1007         bool rc = false;
1008         struct cifs_fid_locks *cur;
1009         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1010
1011         list_for_each_entry(cur, &cinode->llist, llist) {
1012                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1013                                                  flags, cfile, conf_lock,
1014                                                  rw_check);
1015                 if (rc)
1016                         break;
1017         }
1018
1019         return rc;
1020 }
1021
1022 /*
1023  * Check if there is another lock that prevents us to set the lock (mandatory
1024  * style). If such a lock exists, update the flock structure with its
1025  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1026  * or leave it the same if we can't. Returns 0 if we don't need to request to
1027  * the server or 1 otherwise.
1028  */
1029 static int
1030 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1031                __u8 type, struct file_lock *flock)
1032 {
1033         int rc = 0;
1034         struct cifsLockInfo *conf_lock;
1035         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1036         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1037         bool exist;
1038
1039         down_read(&cinode->lock_sem);
1040
1041         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1042                                         flock->fl_flags, &conf_lock,
1043                                         CIFS_LOCK_OP);
1044         if (exist) {
1045                 flock->fl_start = conf_lock->offset;
1046                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1047                 flock->fl_pid = conf_lock->pid;
1048                 if (conf_lock->type & server->vals->shared_lock_type)
1049                         flock->fl_type = F_RDLCK;
1050                 else
1051                         flock->fl_type = F_WRLCK;
1052         } else if (!cinode->can_cache_brlcks)
1053                 rc = 1;
1054         else
1055                 flock->fl_type = F_UNLCK;
1056
1057         up_read(&cinode->lock_sem);
1058         return rc;
1059 }
1060
1061 static void
1062 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1063 {
1064         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1065         cifs_down_write(&cinode->lock_sem);
1066         list_add_tail(&lock->llist, &cfile->llist->locks);
1067         up_write(&cinode->lock_sem);
1068 }
1069
1070 /*
1071  * Set the byte-range lock (mandatory style). Returns:
1072  * 1) 0, if we set the lock and don't need to request to the server;
1073  * 2) 1, if no locks prevent us but we need to request to the server;
1074  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1075  */
1076 static int
1077 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1078                  bool wait)
1079 {
1080         struct cifsLockInfo *conf_lock;
1081         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1082         bool exist;
1083         int rc = 0;
1084
1085 try_again:
1086         exist = false;
1087         cifs_down_write(&cinode->lock_sem);
1088
1089         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1090                                         lock->type, lock->flags, &conf_lock,
1091                                         CIFS_LOCK_OP);
1092         if (!exist && cinode->can_cache_brlcks) {
1093                 list_add_tail(&lock->llist, &cfile->llist->locks);
1094                 up_write(&cinode->lock_sem);
1095                 return rc;
1096         }
1097
1098         if (!exist)
1099                 rc = 1;
1100         else if (!wait)
1101                 rc = -EACCES;
1102         else {
1103                 list_add_tail(&lock->blist, &conf_lock->blist);
1104                 up_write(&cinode->lock_sem);
1105                 rc = wait_event_interruptible(lock->block_q,
1106                                         (lock->blist.prev == &lock->blist) &&
1107                                         (lock->blist.next == &lock->blist));
1108                 if (!rc)
1109                         goto try_again;
1110                 cifs_down_write(&cinode->lock_sem);
1111                 list_del_init(&lock->blist);
1112         }
1113
1114         up_write(&cinode->lock_sem);
1115         return rc;
1116 }
1117
1118 /*
1119  * Check if there is another lock that prevents us to set the lock (posix
1120  * style). If such a lock exists, update the flock structure with its
1121  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1122  * or leave it the same if we can't. Returns 0 if we don't need to request to
1123  * the server or 1 otherwise.
1124  */
1125 static int
1126 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1127 {
1128         int rc = 0;
1129         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1130         unsigned char saved_type = flock->fl_type;
1131
1132         if ((flock->fl_flags & FL_POSIX) == 0)
1133                 return 1;
1134
1135         down_read(&cinode->lock_sem);
1136         posix_test_lock(file, flock);
1137
1138         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1139                 flock->fl_type = saved_type;
1140                 rc = 1;
1141         }
1142
1143         up_read(&cinode->lock_sem);
1144         return rc;
1145 }
1146
1147 /*
1148  * Set the byte-range lock (posix style). Returns:
1149  * 1) 0, if we set the lock and don't need to request to the server;
1150  * 2) 1, if we need to request to the server;
1151  * 3) <0, if the error occurs while setting the lock.
1152  */
1153 static int
1154 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1155 {
1156         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1157         int rc = 1;
1158
1159         if ((flock->fl_flags & FL_POSIX) == 0)
1160                 return rc;
1161
1162 try_again:
1163         cifs_down_write(&cinode->lock_sem);
1164         if (!cinode->can_cache_brlcks) {
1165                 up_write(&cinode->lock_sem);
1166                 return rc;
1167         }
1168
1169         rc = posix_lock_file(file, flock, NULL);
1170         up_write(&cinode->lock_sem);
1171         if (rc == FILE_LOCK_DEFERRED) {
1172                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1173                 if (!rc)
1174                         goto try_again;
1175                 locks_delete_block(flock);
1176         }
1177         return rc;
1178 }
1179
1180 int
1181 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1182 {
1183         unsigned int xid;
1184         int rc = 0, stored_rc;
1185         struct cifsLockInfo *li, *tmp;
1186         struct cifs_tcon *tcon;
1187         unsigned int num, max_num, max_buf;
1188         LOCKING_ANDX_RANGE *buf, *cur;
1189         static const int types[] = {
1190                 LOCKING_ANDX_LARGE_FILES,
1191                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1192         };
1193         int i;
1194
1195         xid = get_xid();
1196         tcon = tlink_tcon(cfile->tlink);
1197
1198         /*
1199          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1200          * and check it before using.
1201          */
1202         max_buf = tcon->ses->server->maxBuf;
1203         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1204                 free_xid(xid);
1205                 return -EINVAL;
1206         }
1207
1208         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1209                      PAGE_SIZE);
1210         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1211                         PAGE_SIZE);
1212         max_num = (max_buf - sizeof(struct smb_hdr)) /
1213                                                 sizeof(LOCKING_ANDX_RANGE);
1214         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1215         if (!buf) {
1216                 free_xid(xid);
1217                 return -ENOMEM;
1218         }
1219
1220         for (i = 0; i < 2; i++) {
1221                 cur = buf;
1222                 num = 0;
1223                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1224                         if (li->type != types[i])
1225                                 continue;
1226                         cur->Pid = cpu_to_le16(li->pid);
1227                         cur->LengthLow = cpu_to_le32((u32)li->length);
1228                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1229                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1230                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1231                         if (++num == max_num) {
1232                                 stored_rc = cifs_lockv(xid, tcon,
1233                                                        cfile->fid.netfid,
1234                                                        (__u8)li->type, 0, num,
1235                                                        buf);
1236                                 if (stored_rc)
1237                                         rc = stored_rc;
1238                                 cur = buf;
1239                                 num = 0;
1240                         } else
1241                                 cur++;
1242                 }
1243
1244                 if (num) {
1245                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1246                                                (__u8)types[i], 0, num, buf);
1247                         if (stored_rc)
1248                                 rc = stored_rc;
1249                 }
1250         }
1251
1252         kfree(buf);
1253         free_xid(xid);
1254         return rc;
1255 }
1256
1257 static __u32
1258 hash_lockowner(fl_owner_t owner)
1259 {
1260         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1261 }
1262
1263 struct lock_to_push {
1264         struct list_head llist;
1265         __u64 offset;
1266         __u64 length;
1267         __u32 pid;
1268         __u16 netfid;
1269         __u8 type;
1270 };
1271
1272 static int
1273 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1274 {
1275         struct inode *inode = d_inode(cfile->dentry);
1276         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1277         struct file_lock *flock;
1278         struct file_lock_context *flctx = inode->i_flctx;
1279         unsigned int count = 0, i;
1280         int rc = 0, xid, type;
1281         struct list_head locks_to_send, *el;
1282         struct lock_to_push *lck, *tmp;
1283         __u64 length;
1284
1285         xid = get_xid();
1286
1287         if (!flctx)
1288                 goto out;
1289
1290         spin_lock(&flctx->flc_lock);
1291         list_for_each(el, &flctx->flc_posix) {
1292                 count++;
1293         }
1294         spin_unlock(&flctx->flc_lock);
1295
1296         INIT_LIST_HEAD(&locks_to_send);
1297
1298         /*
1299          * Allocating count locks is enough because no FL_POSIX locks can be
1300          * added to the list while we are holding cinode->lock_sem that
1301          * protects locking operations of this inode.
1302          */
1303         for (i = 0; i < count; i++) {
1304                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1305                 if (!lck) {
1306                         rc = -ENOMEM;
1307                         goto err_out;
1308                 }
1309                 list_add_tail(&lck->llist, &locks_to_send);
1310         }
1311
1312         el = locks_to_send.next;
1313         spin_lock(&flctx->flc_lock);
1314         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1315                 if (el == &locks_to_send) {
1316                         /*
1317                          * The list ended. We don't have enough allocated
1318                          * structures - something is really wrong.
1319                          */
1320                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1321                         break;
1322                 }
1323                 length = 1 + flock->fl_end - flock->fl_start;
1324                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1325                         type = CIFS_RDLCK;
1326                 else
1327                         type = CIFS_WRLCK;
1328                 lck = list_entry(el, struct lock_to_push, llist);
1329                 lck->pid = hash_lockowner(flock->fl_owner);
1330                 lck->netfid = cfile->fid.netfid;
1331                 lck->length = length;
1332                 lck->type = type;
1333                 lck->offset = flock->fl_start;
1334         }
1335         spin_unlock(&flctx->flc_lock);
1336
1337         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1338                 int stored_rc;
1339
1340                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1341                                              lck->offset, lck->length, NULL,
1342                                              lck->type, 0);
1343                 if (stored_rc)
1344                         rc = stored_rc;
1345                 list_del(&lck->llist);
1346                 kfree(lck);
1347         }
1348
1349 out:
1350         free_xid(xid);
1351         return rc;
1352 err_out:
1353         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1354                 list_del(&lck->llist);
1355                 kfree(lck);
1356         }
1357         goto out;
1358 }
1359
1360 static int
1361 cifs_push_locks(struct cifsFileInfo *cfile)
1362 {
1363         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1364         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1365         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1366         int rc = 0;
1367
1368         /* we are going to update can_cache_brlcks here - need a write access */
1369         cifs_down_write(&cinode->lock_sem);
1370         if (!cinode->can_cache_brlcks) {
1371                 up_write(&cinode->lock_sem);
1372                 return rc;
1373         }
1374
1375         if (cap_unix(tcon->ses) &&
1376             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1377             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1378                 rc = cifs_push_posix_locks(cfile);
1379         else
1380                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1381
1382         cinode->can_cache_brlcks = false;
1383         up_write(&cinode->lock_sem);
1384         return rc;
1385 }
1386
1387 static void
1388 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1389                 bool *wait_flag, struct TCP_Server_Info *server)
1390 {
1391         if (flock->fl_flags & FL_POSIX)
1392                 cifs_dbg(FYI, "Posix\n");
1393         if (flock->fl_flags & FL_FLOCK)
1394                 cifs_dbg(FYI, "Flock\n");
1395         if (flock->fl_flags & FL_SLEEP) {
1396                 cifs_dbg(FYI, "Blocking lock\n");
1397                 *wait_flag = true;
1398         }
1399         if (flock->fl_flags & FL_ACCESS)
1400                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1401         if (flock->fl_flags & FL_LEASE)
1402                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1403         if (flock->fl_flags &
1404             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1405                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1406                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1407
1408         *type = server->vals->large_lock_type;
1409         if (flock->fl_type == F_WRLCK) {
1410                 cifs_dbg(FYI, "F_WRLCK\n");
1411                 *type |= server->vals->exclusive_lock_type;
1412                 *lock = 1;
1413         } else if (flock->fl_type == F_UNLCK) {
1414                 cifs_dbg(FYI, "F_UNLCK\n");
1415                 *type |= server->vals->unlock_lock_type;
1416                 *unlock = 1;
1417                 /* Check if unlock includes more than one lock range */
1418         } else if (flock->fl_type == F_RDLCK) {
1419                 cifs_dbg(FYI, "F_RDLCK\n");
1420                 *type |= server->vals->shared_lock_type;
1421                 *lock = 1;
1422         } else if (flock->fl_type == F_EXLCK) {
1423                 cifs_dbg(FYI, "F_EXLCK\n");
1424                 *type |= server->vals->exclusive_lock_type;
1425                 *lock = 1;
1426         } else if (flock->fl_type == F_SHLCK) {
1427                 cifs_dbg(FYI, "F_SHLCK\n");
1428                 *type |= server->vals->shared_lock_type;
1429                 *lock = 1;
1430         } else
1431                 cifs_dbg(FYI, "Unknown type of lock\n");
1432 }
1433
1434 static int
1435 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1436            bool wait_flag, bool posix_lck, unsigned int xid)
1437 {
1438         int rc = 0;
1439         __u64 length = 1 + flock->fl_end - flock->fl_start;
1440         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1441         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1442         struct TCP_Server_Info *server = tcon->ses->server;
1443         __u16 netfid = cfile->fid.netfid;
1444
1445         if (posix_lck) {
1446                 int posix_lock_type;
1447
1448                 rc = cifs_posix_lock_test(file, flock);
1449                 if (!rc)
1450                         return rc;
1451
1452                 if (type & server->vals->shared_lock_type)
1453                         posix_lock_type = CIFS_RDLCK;
1454                 else
1455                         posix_lock_type = CIFS_WRLCK;
1456                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1457                                       hash_lockowner(flock->fl_owner),
1458                                       flock->fl_start, length, flock,
1459                                       posix_lock_type, wait_flag);
1460                 return rc;
1461         }
1462
1463         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1464         if (!rc)
1465                 return rc;
1466
1467         /* BB we could chain these into one lock request BB */
1468         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1469                                     1, 0, false);
1470         if (rc == 0) {
1471                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1472                                             type, 0, 1, false);
1473                 flock->fl_type = F_UNLCK;
1474                 if (rc != 0)
1475                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1476                                  rc);
1477                 return 0;
1478         }
1479
1480         if (type & server->vals->shared_lock_type) {
1481                 flock->fl_type = F_WRLCK;
1482                 return 0;
1483         }
1484
1485         type &= ~server->vals->exclusive_lock_type;
1486
1487         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1488                                     type | server->vals->shared_lock_type,
1489                                     1, 0, false);
1490         if (rc == 0) {
1491                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1492                         type | server->vals->shared_lock_type, 0, 1, false);
1493                 flock->fl_type = F_RDLCK;
1494                 if (rc != 0)
1495                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1496                                  rc);
1497         } else
1498                 flock->fl_type = F_WRLCK;
1499
1500         return 0;
1501 }
1502
1503 void
1504 cifs_move_llist(struct list_head *source, struct list_head *dest)
1505 {
1506         struct list_head *li, *tmp;
1507         list_for_each_safe(li, tmp, source)
1508                 list_move(li, dest);
1509 }
1510
1511 void
1512 cifs_free_llist(struct list_head *llist)
1513 {
1514         struct cifsLockInfo *li, *tmp;
1515         list_for_each_entry_safe(li, tmp, llist, llist) {
1516                 cifs_del_lock_waiters(li);
1517                 list_del(&li->llist);
1518                 kfree(li);
1519         }
1520 }
1521
1522 int
1523 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1524                   unsigned int xid)
1525 {
1526         int rc = 0, stored_rc;
1527         static const int types[] = {
1528                 LOCKING_ANDX_LARGE_FILES,
1529                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1530         };
1531         unsigned int i;
1532         unsigned int max_num, num, max_buf;
1533         LOCKING_ANDX_RANGE *buf, *cur;
1534         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1535         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1536         struct cifsLockInfo *li, *tmp;
1537         __u64 length = 1 + flock->fl_end - flock->fl_start;
1538         struct list_head tmp_llist;
1539
1540         INIT_LIST_HEAD(&tmp_llist);
1541
1542         /*
1543          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1544          * and check it before using.
1545          */
1546         max_buf = tcon->ses->server->maxBuf;
1547         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1548                 return -EINVAL;
1549
1550         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1551                      PAGE_SIZE);
1552         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1553                         PAGE_SIZE);
1554         max_num = (max_buf - sizeof(struct smb_hdr)) /
1555                                                 sizeof(LOCKING_ANDX_RANGE);
1556         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1557         if (!buf)
1558                 return -ENOMEM;
1559
1560         cifs_down_write(&cinode->lock_sem);
1561         for (i = 0; i < 2; i++) {
1562                 cur = buf;
1563                 num = 0;
1564                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1565                         if (flock->fl_start > li->offset ||
1566                             (flock->fl_start + length) <
1567                             (li->offset + li->length))
1568                                 continue;
1569                         if (current->tgid != li->pid)
1570                                 continue;
1571                         if (types[i] != li->type)
1572                                 continue;
1573                         if (cinode->can_cache_brlcks) {
1574                                 /*
1575                                  * We can cache brlock requests - simply remove
1576                                  * a lock from the file's list.
1577                                  */
1578                                 list_del(&li->llist);
1579                                 cifs_del_lock_waiters(li);
1580                                 kfree(li);
1581                                 continue;
1582                         }
1583                         cur->Pid = cpu_to_le16(li->pid);
1584                         cur->LengthLow = cpu_to_le32((u32)li->length);
1585                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1586                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1587                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1588                         /*
1589                          * We need to save a lock here to let us add it again to
1590                          * the file's list if the unlock range request fails on
1591                          * the server.
1592                          */
1593                         list_move(&li->llist, &tmp_llist);
1594                         if (++num == max_num) {
1595                                 stored_rc = cifs_lockv(xid, tcon,
1596                                                        cfile->fid.netfid,
1597                                                        li->type, num, 0, buf);
1598                                 if (stored_rc) {
1599                                         /*
1600                                          * We failed on the unlock range
1601                                          * request - add all locks from the tmp
1602                                          * list to the head of the file's list.
1603                                          */
1604                                         cifs_move_llist(&tmp_llist,
1605                                                         &cfile->llist->locks);
1606                                         rc = stored_rc;
1607                                 } else
1608                                         /*
1609                                          * The unlock range request succeed -
1610                                          * free the tmp list.
1611                                          */
1612                                         cifs_free_llist(&tmp_llist);
1613                                 cur = buf;
1614                                 num = 0;
1615                         } else
1616                                 cur++;
1617                 }
1618                 if (num) {
1619                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1620                                                types[i], num, 0, buf);
1621                         if (stored_rc) {
1622                                 cifs_move_llist(&tmp_llist,
1623                                                 &cfile->llist->locks);
1624                                 rc = stored_rc;
1625                         } else
1626                                 cifs_free_llist(&tmp_llist);
1627                 }
1628         }
1629
1630         up_write(&cinode->lock_sem);
1631         kfree(buf);
1632         return rc;
1633 }
1634
1635 static int
1636 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1637            bool wait_flag, bool posix_lck, int lock, int unlock,
1638            unsigned int xid)
1639 {
1640         int rc = 0;
1641         __u64 length = 1 + flock->fl_end - flock->fl_start;
1642         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1643         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1644         struct TCP_Server_Info *server = tcon->ses->server;
1645         struct inode *inode = d_inode(cfile->dentry);
1646
1647         if (posix_lck) {
1648                 int posix_lock_type;
1649
1650                 rc = cifs_posix_lock_set(file, flock);
1651                 if (!rc || rc < 0)
1652                         return rc;
1653
1654                 if (type & server->vals->shared_lock_type)
1655                         posix_lock_type = CIFS_RDLCK;
1656                 else
1657                         posix_lock_type = CIFS_WRLCK;
1658
1659                 if (unlock == 1)
1660                         posix_lock_type = CIFS_UNLCK;
1661
1662                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1663                                       hash_lockowner(flock->fl_owner),
1664                                       flock->fl_start, length,
1665                                       NULL, posix_lock_type, wait_flag);
1666                 goto out;
1667         }
1668
1669         if (lock) {
1670                 struct cifsLockInfo *lock;
1671
1672                 lock = cifs_lock_init(flock->fl_start, length, type,
1673                                       flock->fl_flags);
1674                 if (!lock)
1675                         return -ENOMEM;
1676
1677                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1678                 if (rc < 0) {
1679                         kfree(lock);
1680                         return rc;
1681                 }
1682                 if (!rc)
1683                         goto out;
1684
1685                 /*
1686                  * Windows 7 server can delay breaking lease from read to None
1687                  * if we set a byte-range lock on a file - break it explicitly
1688                  * before sending the lock to the server to be sure the next
1689                  * read won't conflict with non-overlapted locks due to
1690                  * pagereading.
1691                  */
1692                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1693                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1694                         cifs_zap_mapping(inode);
1695                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1696                                  inode);
1697                         CIFS_I(inode)->oplock = 0;
1698                 }
1699
1700                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1701                                             type, 1, 0, wait_flag);
1702                 if (rc) {
1703                         kfree(lock);
1704                         return rc;
1705                 }
1706
1707                 cifs_lock_add(cfile, lock);
1708         } else if (unlock)
1709                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1710
1711 out:
1712         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1713                 /*
1714                  * If this is a request to remove all locks because we
1715                  * are closing the file, it doesn't matter if the
1716                  * unlocking failed as both cifs.ko and the SMB server
1717                  * remove the lock on file close
1718                  */
1719                 if (rc) {
1720                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1721                         if (!(flock->fl_flags & FL_CLOSE))
1722                                 return rc;
1723                 }
1724                 rc = locks_lock_file_wait(file, flock);
1725         }
1726         return rc;
1727 }
1728
1729 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1730 {
1731         int rc, xid;
1732         int lock = 0, unlock = 0;
1733         bool wait_flag = false;
1734         bool posix_lck = false;
1735         struct cifs_sb_info *cifs_sb;
1736         struct cifs_tcon *tcon;
1737         struct cifsFileInfo *cfile;
1738         __u32 type;
1739
1740         rc = -EACCES;
1741         xid = get_xid();
1742
1743         if (!(fl->fl_flags & FL_FLOCK))
1744                 return -ENOLCK;
1745
1746         cfile = (struct cifsFileInfo *)file->private_data;
1747         tcon = tlink_tcon(cfile->tlink);
1748
1749         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1750                         tcon->ses->server);
1751         cifs_sb = CIFS_FILE_SB(file);
1752
1753         if (cap_unix(tcon->ses) &&
1754             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1755             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1756                 posix_lck = true;
1757
1758         if (!lock && !unlock) {
1759                 /*
1760                  * if no lock or unlock then nothing to do since we do not
1761                  * know what it is
1762                  */
1763                 free_xid(xid);
1764                 return -EOPNOTSUPP;
1765         }
1766
1767         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1768                         xid);
1769         free_xid(xid);
1770         return rc;
1771
1772
1773 }
1774
1775 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1776 {
1777         int rc, xid;
1778         int lock = 0, unlock = 0;
1779         bool wait_flag = false;
1780         bool posix_lck = false;
1781         struct cifs_sb_info *cifs_sb;
1782         struct cifs_tcon *tcon;
1783         struct cifsFileInfo *cfile;
1784         __u32 type;
1785
1786         rc = -EACCES;
1787         xid = get_xid();
1788
1789         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1790                  cmd, flock->fl_flags, flock->fl_type,
1791                  flock->fl_start, flock->fl_end);
1792
1793         cfile = (struct cifsFileInfo *)file->private_data;
1794         tcon = tlink_tcon(cfile->tlink);
1795
1796         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1797                         tcon->ses->server);
1798         cifs_sb = CIFS_FILE_SB(file);
1799
1800         if (cap_unix(tcon->ses) &&
1801             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1802             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1803                 posix_lck = true;
1804         /*
1805          * BB add code here to normalize offset and length to account for
1806          * negative length which we can not accept over the wire.
1807          */
1808         if (IS_GETLK(cmd)) {
1809                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1810                 free_xid(xid);
1811                 return rc;
1812         }
1813
1814         if (!lock && !unlock) {
1815                 /*
1816                  * if no lock or unlock then nothing to do since we do not
1817                  * know what it is
1818                  */
1819                 free_xid(xid);
1820                 return -EOPNOTSUPP;
1821         }
1822
1823         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1824                         xid);
1825         free_xid(xid);
1826         return rc;
1827 }
1828
1829 /*
1830  * update the file size (if needed) after a write. Should be called with
1831  * the inode->i_lock held
1832  */
1833 void
1834 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1835                       unsigned int bytes_written)
1836 {
1837         loff_t end_of_write = offset + bytes_written;
1838
1839         if (end_of_write > cifsi->server_eof)
1840                 cifsi->server_eof = end_of_write;
1841 }
1842
1843 static ssize_t
1844 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1845            size_t write_size, loff_t *offset)
1846 {
1847         int rc = 0;
1848         unsigned int bytes_written = 0;
1849         unsigned int total_written;
1850         struct cifs_tcon *tcon;
1851         struct TCP_Server_Info *server;
1852         unsigned int xid;
1853         struct dentry *dentry = open_file->dentry;
1854         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1855         struct cifs_io_parms io_parms;
1856
1857         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1858                  write_size, *offset, dentry);
1859
1860         tcon = tlink_tcon(open_file->tlink);
1861         server = tcon->ses->server;
1862
1863         if (!server->ops->sync_write)
1864                 return -ENOSYS;
1865
1866         xid = get_xid();
1867
1868         for (total_written = 0; write_size > total_written;
1869              total_written += bytes_written) {
1870                 rc = -EAGAIN;
1871                 while (rc == -EAGAIN) {
1872                         struct kvec iov[2];
1873                         unsigned int len;
1874
1875                         if (open_file->invalidHandle) {
1876                                 /* we could deadlock if we called
1877                                    filemap_fdatawait from here so tell
1878                                    reopen_file not to flush data to
1879                                    server now */
1880                                 rc = cifs_reopen_file(open_file, false);
1881                                 if (rc != 0)
1882                                         break;
1883                         }
1884
1885                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1886                                   (unsigned int)write_size - total_written);
1887                         /* iov[0] is reserved for smb header */
1888                         iov[1].iov_base = (char *)write_data + total_written;
1889                         iov[1].iov_len = len;
1890                         io_parms.pid = pid;
1891                         io_parms.tcon = tcon;
1892                         io_parms.offset = *offset;
1893                         io_parms.length = len;
1894                         rc = server->ops->sync_write(xid, &open_file->fid,
1895                                         &io_parms, &bytes_written, iov, 1);
1896                 }
1897                 if (rc || (bytes_written == 0)) {
1898                         if (total_written)
1899                                 break;
1900                         else {
1901                                 free_xid(xid);
1902                                 return rc;
1903                         }
1904                 } else {
1905                         spin_lock(&d_inode(dentry)->i_lock);
1906                         cifs_update_eof(cifsi, *offset, bytes_written);
1907                         spin_unlock(&d_inode(dentry)->i_lock);
1908                         *offset += bytes_written;
1909                 }
1910         }
1911
1912         cifs_stats_bytes_written(tcon, total_written);
1913
1914         if (total_written > 0) {
1915                 spin_lock(&d_inode(dentry)->i_lock);
1916                 if (*offset > d_inode(dentry)->i_size)
1917                         i_size_write(d_inode(dentry), *offset);
1918                 spin_unlock(&d_inode(dentry)->i_lock);
1919         }
1920         mark_inode_dirty_sync(d_inode(dentry));
1921         free_xid(xid);
1922         return total_written;
1923 }
1924
1925 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1926                                         bool fsuid_only)
1927 {
1928         struct cifsFileInfo *open_file = NULL;
1929         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1930
1931         /* only filter by fsuid on multiuser mounts */
1932         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1933                 fsuid_only = false;
1934
1935         spin_lock(&cifs_inode->open_file_lock);
1936         /* we could simply get the first_list_entry since write-only entries
1937            are always at the end of the list but since the first entry might
1938            have a close pending, we go through the whole list */
1939         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1940                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1941                         continue;
1942                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1943                         if (!open_file->invalidHandle) {
1944                                 /* found a good file */
1945                                 /* lock it so it will not be closed on us */
1946                                 cifsFileInfo_get(open_file);
1947                                 spin_unlock(&cifs_inode->open_file_lock);
1948                                 return open_file;
1949                         } /* else might as well continue, and look for
1950                              another, or simply have the caller reopen it
1951                              again rather than trying to fix this handle */
1952                 } else /* write only file */
1953                         break; /* write only files are last so must be done */
1954         }
1955         spin_unlock(&cifs_inode->open_file_lock);
1956         return NULL;
1957 }
1958
1959 /* Return -EBADF if no handle is found and general rc otherwise */
1960 int
1961 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1962                        struct cifsFileInfo **ret_file)
1963 {
1964         struct cifsFileInfo *open_file, *inv_file = NULL;
1965         struct cifs_sb_info *cifs_sb;
1966         bool any_available = false;
1967         int rc = -EBADF;
1968         unsigned int refind = 0;
1969         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1970         bool with_delete = flags & FIND_WR_WITH_DELETE;
1971         *ret_file = NULL;
1972
1973         /*
1974          * Having a null inode here (because mapping->host was set to zero by
1975          * the VFS or MM) should not happen but we had reports of on oops (due
1976          * to it being zero) during stress testcases so we need to check for it
1977          */
1978
1979         if (cifs_inode == NULL) {
1980                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1981                 dump_stack();
1982                 return rc;
1983         }
1984
1985         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1986
1987         /* only filter by fsuid on multiuser mounts */
1988         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1989                 fsuid_only = false;
1990
1991         spin_lock(&cifs_inode->open_file_lock);
1992 refind_writable:
1993         if (refind > MAX_REOPEN_ATT) {
1994                 spin_unlock(&cifs_inode->open_file_lock);
1995                 return rc;
1996         }
1997         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1998                 if (!any_available && open_file->pid != current->tgid)
1999                         continue;
2000                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2001                         continue;
2002                 if (with_delete && !(open_file->fid.access & DELETE))
2003                         continue;
2004                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2005                         if (!open_file->invalidHandle) {
2006                                 /* found a good writable file */
2007                                 cifsFileInfo_get(open_file);
2008                                 spin_unlock(&cifs_inode->open_file_lock);
2009                                 *ret_file = open_file;
2010                                 return 0;
2011                         } else {
2012                                 if (!inv_file)
2013                                         inv_file = open_file;
2014                         }
2015                 }
2016         }
2017         /* couldn't find useable FH with same pid, try any available */
2018         if (!any_available) {
2019                 any_available = true;
2020                 goto refind_writable;
2021         }
2022
2023         if (inv_file) {
2024                 any_available = false;
2025                 cifsFileInfo_get(inv_file);
2026         }
2027
2028         spin_unlock(&cifs_inode->open_file_lock);
2029
2030         if (inv_file) {
2031                 rc = cifs_reopen_file(inv_file, false);
2032                 if (!rc) {
2033                         *ret_file = inv_file;
2034                         return 0;
2035                 }
2036
2037                 spin_lock(&cifs_inode->open_file_lock);
2038                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2039                 spin_unlock(&cifs_inode->open_file_lock);
2040                 cifsFileInfo_put(inv_file);
2041                 ++refind;
2042                 inv_file = NULL;
2043                 spin_lock(&cifs_inode->open_file_lock);
2044                 goto refind_writable;
2045         }
2046
2047         return rc;
2048 }
2049
2050 struct cifsFileInfo *
2051 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2052 {
2053         struct cifsFileInfo *cfile;
2054         int rc;
2055
2056         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2057         if (rc)
2058                 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
2059
2060         return cfile;
2061 }
2062
2063 int
2064 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2065                        int flags,
2066                        struct cifsFileInfo **ret_file)
2067 {
2068         struct list_head *tmp;
2069         struct cifsFileInfo *cfile;
2070         struct cifsInodeInfo *cinode;
2071         char *full_path;
2072
2073         *ret_file = NULL;
2074
2075         spin_lock(&tcon->open_file_lock);
2076         list_for_each(tmp, &tcon->openFileList) {
2077                 cfile = list_entry(tmp, struct cifsFileInfo,
2078                              tlist);
2079                 full_path = build_path_from_dentry(cfile->dentry);
2080                 if (full_path == NULL) {
2081                         spin_unlock(&tcon->open_file_lock);
2082                         return -ENOMEM;
2083                 }
2084                 if (strcmp(full_path, name)) {
2085                         kfree(full_path);
2086                         continue;
2087                 }
2088
2089                 kfree(full_path);
2090                 cinode = CIFS_I(d_inode(cfile->dentry));
2091                 spin_unlock(&tcon->open_file_lock);
2092                 return cifs_get_writable_file(cinode, flags, ret_file);
2093         }
2094
2095         spin_unlock(&tcon->open_file_lock);
2096         return -ENOENT;
2097 }
2098
2099 int
2100 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2101                        struct cifsFileInfo **ret_file)
2102 {
2103         struct list_head *tmp;
2104         struct cifsFileInfo *cfile;
2105         struct cifsInodeInfo *cinode;
2106         char *full_path;
2107
2108         *ret_file = NULL;
2109
2110         spin_lock(&tcon->open_file_lock);
2111         list_for_each(tmp, &tcon->openFileList) {
2112                 cfile = list_entry(tmp, struct cifsFileInfo,
2113                              tlist);
2114                 full_path = build_path_from_dentry(cfile->dentry);
2115                 if (full_path == NULL) {
2116                         spin_unlock(&tcon->open_file_lock);
2117                         return -ENOMEM;
2118                 }
2119                 if (strcmp(full_path, name)) {
2120                         kfree(full_path);
2121                         continue;
2122                 }
2123
2124                 kfree(full_path);
2125                 cinode = CIFS_I(d_inode(cfile->dentry));
2126                 spin_unlock(&tcon->open_file_lock);
2127                 *ret_file = find_readable_file(cinode, 0);
2128                 return *ret_file ? 0 : -ENOENT;
2129         }
2130
2131         spin_unlock(&tcon->open_file_lock);
2132         return -ENOENT;
2133 }
2134
2135 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2136 {
2137         struct address_space *mapping = page->mapping;
2138         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2139         char *write_data;
2140         int rc = -EFAULT;
2141         int bytes_written = 0;
2142         struct inode *inode;
2143         struct cifsFileInfo *open_file;
2144
2145         if (!mapping || !mapping->host)
2146                 return -EFAULT;
2147
2148         inode = page->mapping->host;
2149
2150         offset += (loff_t)from;
2151         write_data = kmap(page);
2152         write_data += from;
2153
2154         if ((to > PAGE_SIZE) || (from > to)) {
2155                 kunmap(page);
2156                 return -EIO;
2157         }
2158
2159         /* racing with truncate? */
2160         if (offset > mapping->host->i_size) {
2161                 kunmap(page);
2162                 return 0; /* don't care */
2163         }
2164
2165         /* check to make sure that we are not extending the file */
2166         if (mapping->host->i_size - offset < (loff_t)to)
2167                 to = (unsigned)(mapping->host->i_size - offset);
2168
2169         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2170                                     &open_file);
2171         if (!rc) {
2172                 bytes_written = cifs_write(open_file, open_file->pid,
2173                                            write_data, to - from, &offset);
2174                 cifsFileInfo_put(open_file);
2175                 /* Does mm or vfs already set times? */
2176                 inode->i_atime = inode->i_mtime = current_time(inode);
2177                 if ((bytes_written > 0) && (offset))
2178                         rc = 0;
2179                 else if (bytes_written < 0)
2180                         rc = bytes_written;
2181                 else
2182                         rc = -EFAULT;
2183         } else {
2184                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2185                 if (!is_retryable_error(rc))
2186                         rc = -EIO;
2187         }
2188
2189         kunmap(page);
2190         return rc;
2191 }
2192
2193 static struct cifs_writedata *
2194 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2195                           pgoff_t end, pgoff_t *index,
2196                           unsigned int *found_pages)
2197 {
2198         struct cifs_writedata *wdata;
2199
2200         wdata = cifs_writedata_alloc((unsigned int)tofind,
2201                                      cifs_writev_complete);
2202         if (!wdata)
2203                 return NULL;
2204
2205         *found_pages = find_get_pages_range_tag(mapping, index, end,
2206                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2207         return wdata;
2208 }
2209
2210 static unsigned int
2211 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2212                     struct address_space *mapping,
2213                     struct writeback_control *wbc,
2214                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2215 {
2216         unsigned int nr_pages = 0, i;
2217         struct page *page;
2218
2219         for (i = 0; i < found_pages; i++) {
2220                 page = wdata->pages[i];
2221                 /*
2222                  * At this point we hold neither the i_pages lock nor the
2223                  * page lock: the page may be truncated or invalidated
2224                  * (changing page->mapping to NULL), or even swizzled
2225                  * back from swapper_space to tmpfs file mapping
2226                  */
2227
2228                 if (nr_pages == 0)
2229                         lock_page(page);
2230                 else if (!trylock_page(page))
2231                         break;
2232
2233                 if (unlikely(page->mapping != mapping)) {
2234                         unlock_page(page);
2235                         break;
2236                 }
2237
2238                 if (!wbc->range_cyclic && page->index > end) {
2239                         *done = true;
2240                         unlock_page(page);
2241                         break;
2242                 }
2243
2244                 if (*next && (page->index != *next)) {
2245                         /* Not next consecutive page */
2246                         unlock_page(page);
2247                         break;
2248                 }
2249
2250                 if (wbc->sync_mode != WB_SYNC_NONE)
2251                         wait_on_page_writeback(page);
2252
2253                 if (PageWriteback(page) ||
2254                                 !clear_page_dirty_for_io(page)) {
2255                         unlock_page(page);
2256                         break;
2257                 }
2258
2259                 /*
2260                  * This actually clears the dirty bit in the radix tree.
2261                  * See cifs_writepage() for more commentary.
2262                  */
2263                 set_page_writeback(page);
2264                 if (page_offset(page) >= i_size_read(mapping->host)) {
2265                         *done = true;
2266                         unlock_page(page);
2267                         end_page_writeback(page);
2268                         break;
2269                 }
2270
2271                 wdata->pages[i] = page;
2272                 *next = page->index + 1;
2273                 ++nr_pages;
2274         }
2275
2276         /* reset index to refind any pages skipped */
2277         if (nr_pages == 0)
2278                 *index = wdata->pages[0]->index + 1;
2279
2280         /* put any pages we aren't going to use */
2281         for (i = nr_pages; i < found_pages; i++) {
2282                 put_page(wdata->pages[i]);
2283                 wdata->pages[i] = NULL;
2284         }
2285
2286         return nr_pages;
2287 }
2288
2289 static int
2290 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2291                  struct address_space *mapping, struct writeback_control *wbc)
2292 {
2293         int rc;
2294         struct TCP_Server_Info *server =
2295                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2296
2297         wdata->sync_mode = wbc->sync_mode;
2298         wdata->nr_pages = nr_pages;
2299         wdata->offset = page_offset(wdata->pages[0]);
2300         wdata->pagesz = PAGE_SIZE;
2301         wdata->tailsz = min(i_size_read(mapping->host) -
2302                         page_offset(wdata->pages[nr_pages - 1]),
2303                         (loff_t)PAGE_SIZE);
2304         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2305         wdata->pid = wdata->cfile->pid;
2306
2307         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2308         if (rc)
2309                 return rc;
2310
2311         if (wdata->cfile->invalidHandle)
2312                 rc = -EAGAIN;
2313         else
2314                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2315
2316         return rc;
2317 }
2318
2319 static int cifs_writepages(struct address_space *mapping,
2320                            struct writeback_control *wbc)
2321 {
2322         struct inode *inode = mapping->host;
2323         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2324         struct TCP_Server_Info *server;
2325         bool done = false, scanned = false, range_whole = false;
2326         pgoff_t end, index;
2327         struct cifs_writedata *wdata;
2328         struct cifsFileInfo *cfile = NULL;
2329         int rc = 0;
2330         int saved_rc = 0;
2331         unsigned int xid;
2332
2333         /*
2334          * If wsize is smaller than the page cache size, default to writing
2335          * one page at a time via cifs_writepage
2336          */
2337         if (cifs_sb->wsize < PAGE_SIZE)
2338                 return generic_writepages(mapping, wbc);
2339
2340         xid = get_xid();
2341         if (wbc->range_cyclic) {
2342                 index = mapping->writeback_index; /* Start from prev offset */
2343                 end = -1;
2344         } else {
2345                 index = wbc->range_start >> PAGE_SHIFT;
2346                 end = wbc->range_end >> PAGE_SHIFT;
2347                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2348                         range_whole = true;
2349                 scanned = true;
2350         }
2351         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2352 retry:
2353         while (!done && index <= end) {
2354                 unsigned int i, nr_pages, found_pages, wsize;
2355                 pgoff_t next = 0, tofind, saved_index = index;
2356                 struct cifs_credits credits_on_stack;
2357                 struct cifs_credits *credits = &credits_on_stack;
2358                 int get_file_rc = 0;
2359
2360                 if (cfile)
2361                         cifsFileInfo_put(cfile);
2362
2363                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2364
2365                 /* in case of an error store it to return later */
2366                 if (rc)
2367                         get_file_rc = rc;
2368
2369                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2370                                                    &wsize, credits);
2371                 if (rc != 0) {
2372                         done = true;
2373                         break;
2374                 }
2375
2376                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2377
2378                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2379                                                   &found_pages);
2380                 if (!wdata) {
2381                         rc = -ENOMEM;
2382                         done = true;
2383                         add_credits_and_wake_if(server, credits, 0);
2384                         break;
2385                 }
2386
2387                 if (found_pages == 0) {
2388                         kref_put(&wdata->refcount, cifs_writedata_release);
2389                         add_credits_and_wake_if(server, credits, 0);
2390                         break;
2391                 }
2392
2393                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2394                                                end, &index, &next, &done);
2395
2396                 /* nothing to write? */
2397                 if (nr_pages == 0) {
2398                         kref_put(&wdata->refcount, cifs_writedata_release);
2399                         add_credits_and_wake_if(server, credits, 0);
2400                         continue;
2401                 }
2402
2403                 wdata->credits = credits_on_stack;
2404                 wdata->cfile = cfile;
2405                 cfile = NULL;
2406
2407                 if (!wdata->cfile) {
2408                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2409                                  get_file_rc);
2410                         if (is_retryable_error(get_file_rc))
2411                                 rc = get_file_rc;
2412                         else
2413                                 rc = -EBADF;
2414                 } else
2415                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2416
2417                 for (i = 0; i < nr_pages; ++i)
2418                         unlock_page(wdata->pages[i]);
2419
2420                 /* send failure -- clean up the mess */
2421                 if (rc != 0) {
2422                         add_credits_and_wake_if(server, &wdata->credits, 0);
2423                         for (i = 0; i < nr_pages; ++i) {
2424                                 if (is_retryable_error(rc))
2425                                         redirty_page_for_writepage(wbc,
2426                                                            wdata->pages[i]);
2427                                 else
2428                                         SetPageError(wdata->pages[i]);
2429                                 end_page_writeback(wdata->pages[i]);
2430                                 put_page(wdata->pages[i]);
2431                         }
2432                         if (!is_retryable_error(rc))
2433                                 mapping_set_error(mapping, rc);
2434                 }
2435                 kref_put(&wdata->refcount, cifs_writedata_release);
2436
2437                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2438                         index = saved_index;
2439                         continue;
2440                 }
2441
2442                 /* Return immediately if we received a signal during writing */
2443                 if (is_interrupt_error(rc)) {
2444                         done = true;
2445                         break;
2446                 }
2447
2448                 if (rc != 0 && saved_rc == 0)
2449                         saved_rc = rc;
2450
2451                 wbc->nr_to_write -= nr_pages;
2452                 if (wbc->nr_to_write <= 0)
2453                         done = true;
2454
2455                 index = next;
2456         }
2457
2458         if (!scanned && !done) {
2459                 /*
2460                  * We hit the last page and there is more work to be done: wrap
2461                  * back to the start of the file
2462                  */
2463                 scanned = true;
2464                 index = 0;
2465                 goto retry;
2466         }
2467
2468         if (saved_rc != 0)
2469                 rc = saved_rc;
2470
2471         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2472                 mapping->writeback_index = index;
2473
2474         if (cfile)
2475                 cifsFileInfo_put(cfile);
2476         free_xid(xid);
2477         return rc;
2478 }
2479
2480 static int
2481 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2482 {
2483         int rc;
2484         unsigned int xid;
2485
2486         xid = get_xid();
2487 /* BB add check for wbc flags */
2488         get_page(page);
2489         if (!PageUptodate(page))
2490                 cifs_dbg(FYI, "ppw - page not up to date\n");
2491
2492         /*
2493          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2494          *
2495          * A writepage() implementation always needs to do either this,
2496          * or re-dirty the page with "redirty_page_for_writepage()" in
2497          * the case of a failure.
2498          *
2499          * Just unlocking the page will cause the radix tree tag-bits
2500          * to fail to update with the state of the page correctly.
2501          */
2502         set_page_writeback(page);
2503 retry_write:
2504         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2505         if (is_retryable_error(rc)) {
2506                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2507                         goto retry_write;
2508                 redirty_page_for_writepage(wbc, page);
2509         } else if (rc != 0) {
2510                 SetPageError(page);
2511                 mapping_set_error(page->mapping, rc);
2512         } else {
2513                 SetPageUptodate(page);
2514         }
2515         end_page_writeback(page);
2516         put_page(page);
2517         free_xid(xid);
2518         return rc;
2519 }
2520
2521 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2522 {
2523         int rc = cifs_writepage_locked(page, wbc);
2524         unlock_page(page);
2525         return rc;
2526 }
2527
2528 static int cifs_write_end(struct file *file, struct address_space *mapping,
2529                         loff_t pos, unsigned len, unsigned copied,
2530                         struct page *page, void *fsdata)
2531 {
2532         int rc;
2533         struct inode *inode = mapping->host;
2534         struct cifsFileInfo *cfile = file->private_data;
2535         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2536         __u32 pid;
2537
2538         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2539                 pid = cfile->pid;
2540         else
2541                 pid = current->tgid;
2542
2543         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2544                  page, pos, copied);
2545
2546         if (PageChecked(page)) {
2547                 if (copied == len)
2548                         SetPageUptodate(page);
2549                 ClearPageChecked(page);
2550         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2551                 SetPageUptodate(page);
2552
2553         if (!PageUptodate(page)) {
2554                 char *page_data;
2555                 unsigned offset = pos & (PAGE_SIZE - 1);
2556                 unsigned int xid;
2557
2558                 xid = get_xid();
2559                 /* this is probably better than directly calling
2560                    partialpage_write since in this function the file handle is
2561                    known which we might as well leverage */
2562                 /* BB check if anything else missing out of ppw
2563                    such as updating last write time */
2564                 page_data = kmap(page);
2565                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2566                 /* if (rc < 0) should we set writebehind rc? */
2567                 kunmap(page);
2568
2569                 free_xid(xid);
2570         } else {
2571                 rc = copied;
2572                 pos += copied;
2573                 set_page_dirty(page);
2574         }
2575
2576         if (rc > 0) {
2577                 spin_lock(&inode->i_lock);
2578                 if (pos > inode->i_size)
2579                         i_size_write(inode, pos);
2580                 spin_unlock(&inode->i_lock);
2581         }
2582
2583         unlock_page(page);
2584         put_page(page);
2585
2586         return rc;
2587 }
2588
2589 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2590                       int datasync)
2591 {
2592         unsigned int xid;
2593         int rc = 0;
2594         struct cifs_tcon *tcon;
2595         struct TCP_Server_Info *server;
2596         struct cifsFileInfo *smbfile = file->private_data;
2597         struct inode *inode = file_inode(file);
2598         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2599
2600         rc = file_write_and_wait_range(file, start, end);
2601         if (rc) {
2602                 trace_cifs_fsync_err(inode->i_ino, rc);
2603                 return rc;
2604         }
2605
2606         xid = get_xid();
2607
2608         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2609                  file, datasync);
2610
2611         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2612                 rc = cifs_zap_mapping(inode);
2613                 if (rc) {
2614                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2615                         rc = 0; /* don't care about it in fsync */
2616                 }
2617         }
2618
2619         tcon = tlink_tcon(smbfile->tlink);
2620         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2621                 server = tcon->ses->server;
2622                 if (server->ops->flush)
2623                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2624                 else
2625                         rc = -ENOSYS;
2626         }
2627
2628         free_xid(xid);
2629         return rc;
2630 }
2631
2632 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2633 {
2634         unsigned int xid;
2635         int rc = 0;
2636         struct cifs_tcon *tcon;
2637         struct TCP_Server_Info *server;
2638         struct cifsFileInfo *smbfile = file->private_data;
2639         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2640
2641         rc = file_write_and_wait_range(file, start, end);
2642         if (rc) {
2643                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2644                 return rc;
2645         }
2646
2647         xid = get_xid();
2648
2649         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2650                  file, datasync);
2651
2652         tcon = tlink_tcon(smbfile->tlink);
2653         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2654                 server = tcon->ses->server;
2655                 if (server->ops->flush)
2656                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2657                 else
2658                         rc = -ENOSYS;
2659         }
2660
2661         free_xid(xid);
2662         return rc;
2663 }
2664
2665 /*
2666  * As file closes, flush all cached write data for this inode checking
2667  * for write behind errors.
2668  */
2669 int cifs_flush(struct file *file, fl_owner_t id)
2670 {
2671         struct inode *inode = file_inode(file);
2672         int rc = 0;
2673
2674         if (file->f_mode & FMODE_WRITE)
2675                 rc = filemap_write_and_wait(inode->i_mapping);
2676
2677         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2678         if (rc)
2679                 trace_cifs_flush_err(inode->i_ino, rc);
2680         return rc;
2681 }
2682
2683 static int
2684 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2685 {
2686         int rc = 0;
2687         unsigned long i;
2688
2689         for (i = 0; i < num_pages; i++) {
2690                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2691                 if (!pages[i]) {
2692                         /*
2693                          * save number of pages we have already allocated and
2694                          * return with ENOMEM error
2695                          */
2696                         num_pages = i;
2697                         rc = -ENOMEM;
2698                         break;
2699                 }
2700         }
2701
2702         if (rc) {
2703                 for (i = 0; i < num_pages; i++)
2704                         put_page(pages[i]);
2705         }
2706         return rc;
2707 }
2708
2709 static inline
2710 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2711 {
2712         size_t num_pages;
2713         size_t clen;
2714
2715         clen = min_t(const size_t, len, wsize);
2716         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2717
2718         if (cur_len)
2719                 *cur_len = clen;
2720
2721         return num_pages;
2722 }
2723
2724 static void
2725 cifs_uncached_writedata_release(struct kref *refcount)
2726 {
2727         int i;
2728         struct cifs_writedata *wdata = container_of(refcount,
2729                                         struct cifs_writedata, refcount);
2730
2731         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2732         for (i = 0; i < wdata->nr_pages; i++)
2733                 put_page(wdata->pages[i]);
2734         cifs_writedata_release(refcount);
2735 }
2736
2737 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2738
2739 static void
2740 cifs_uncached_writev_complete(struct work_struct *work)
2741 {
2742         struct cifs_writedata *wdata = container_of(work,
2743                                         struct cifs_writedata, work);
2744         struct inode *inode = d_inode(wdata->cfile->dentry);
2745         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2746
2747         spin_lock(&inode->i_lock);
2748         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2749         if (cifsi->server_eof > inode->i_size)
2750                 i_size_write(inode, cifsi->server_eof);
2751         spin_unlock(&inode->i_lock);
2752
2753         complete(&wdata->done);
2754         collect_uncached_write_data(wdata->ctx);
2755         /* the below call can possibly free the last ref to aio ctx */
2756         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2757 }
2758
2759 static int
2760 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2761                       size_t *len, unsigned long *num_pages)
2762 {
2763         size_t save_len, copied, bytes, cur_len = *len;
2764         unsigned long i, nr_pages = *num_pages;
2765
2766         save_len = cur_len;
2767         for (i = 0; i < nr_pages; i++) {
2768                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2769                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2770                 cur_len -= copied;
2771                 /*
2772                  * If we didn't copy as much as we expected, then that
2773                  * may mean we trod into an unmapped area. Stop copying
2774                  * at that point. On the next pass through the big
2775                  * loop, we'll likely end up getting a zero-length
2776                  * write and bailing out of it.
2777                  */
2778                 if (copied < bytes)
2779                         break;
2780         }
2781         cur_len = save_len - cur_len;
2782         *len = cur_len;
2783
2784         /*
2785          * If we have no data to send, then that probably means that
2786          * the copy above failed altogether. That's most likely because
2787          * the address in the iovec was bogus. Return -EFAULT and let
2788          * the caller free anything we allocated and bail out.
2789          */
2790         if (!cur_len)
2791                 return -EFAULT;
2792
2793         /*
2794          * i + 1 now represents the number of pages we actually used in
2795          * the copy phase above.
2796          */
2797         *num_pages = i + 1;
2798         return 0;
2799 }
2800
2801 static int
2802 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2803         struct cifs_aio_ctx *ctx)
2804 {
2805         unsigned int wsize;
2806         struct cifs_credits credits;
2807         int rc;
2808         struct TCP_Server_Info *server =
2809                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2810
2811         do {
2812                 if (wdata->cfile->invalidHandle) {
2813                         rc = cifs_reopen_file(wdata->cfile, false);
2814                         if (rc == -EAGAIN)
2815                                 continue;
2816                         else if (rc)
2817                                 break;
2818                 }
2819
2820
2821                 /*
2822                  * Wait for credits to resend this wdata.
2823                  * Note: we are attempting to resend the whole wdata not in
2824                  * segments
2825                  */
2826                 do {
2827                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2828                                                 &wsize, &credits);
2829                         if (rc)
2830                                 goto fail;
2831
2832                         if (wsize < wdata->bytes) {
2833                                 add_credits_and_wake_if(server, &credits, 0);
2834                                 msleep(1000);
2835                         }
2836                 } while (wsize < wdata->bytes);
2837                 wdata->credits = credits;
2838
2839                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2840
2841                 if (!rc) {
2842                         if (wdata->cfile->invalidHandle)
2843                                 rc = -EAGAIN;
2844                         else {
2845 #ifdef CONFIG_CIFS_SMB_DIRECT
2846                                 if (wdata->mr) {
2847                                         wdata->mr->need_invalidate = true;
2848                                         smbd_deregister_mr(wdata->mr);
2849                                         wdata->mr = NULL;
2850                                 }
2851 #endif
2852                                 rc = server->ops->async_writev(wdata,
2853                                         cifs_uncached_writedata_release);
2854                         }
2855                 }
2856
2857                 /* If the write was successfully sent, we are done */
2858                 if (!rc) {
2859                         list_add_tail(&wdata->list, wdata_list);
2860                         return 0;
2861                 }
2862
2863                 /* Roll back credits and retry if needed */
2864                 add_credits_and_wake_if(server, &wdata->credits, 0);
2865         } while (rc == -EAGAIN);
2866
2867 fail:
2868         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2869         return rc;
2870 }
2871
2872 static int
2873 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2874                      struct cifsFileInfo *open_file,
2875                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2876                      struct cifs_aio_ctx *ctx)
2877 {
2878         int rc = 0;
2879         size_t cur_len;
2880         unsigned long nr_pages, num_pages, i;
2881         struct cifs_writedata *wdata;
2882         struct iov_iter saved_from = *from;
2883         loff_t saved_offset = offset;
2884         pid_t pid;
2885         struct TCP_Server_Info *server;
2886         struct page **pagevec;
2887         size_t start;
2888         unsigned int xid;
2889
2890         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2891                 pid = open_file->pid;
2892         else
2893                 pid = current->tgid;
2894
2895         server = tlink_tcon(open_file->tlink)->ses->server;
2896         xid = get_xid();
2897
2898         do {
2899                 unsigned int wsize;
2900                 struct cifs_credits credits_on_stack;
2901                 struct cifs_credits *credits = &credits_on_stack;
2902
2903                 if (open_file->invalidHandle) {
2904                         rc = cifs_reopen_file(open_file, false);
2905                         if (rc == -EAGAIN)
2906                                 continue;
2907                         else if (rc)
2908                                 break;
2909                 }
2910
2911                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2912                                                    &wsize, credits);
2913                 if (rc)
2914                         break;
2915
2916                 cur_len = min_t(const size_t, len, wsize);
2917
2918                 if (ctx->direct_io) {
2919                         ssize_t result;
2920
2921                         result = iov_iter_get_pages_alloc(
2922                                 from, &pagevec, cur_len, &start);
2923                         if (result < 0) {
2924                                 cifs_dbg(VFS,
2925                                         "direct_writev couldn't get user pages "
2926                                         "(rc=%zd) iter type %d iov_offset %zd "
2927                                         "count %zd\n",
2928                                         result, iov_iter_type(from),
2929                                         from->iov_offset, from->count);
2930                                 dump_stack();
2931
2932                                 rc = result;
2933                                 add_credits_and_wake_if(server, credits, 0);
2934                                 break;
2935                         }
2936                         cur_len = (size_t)result;
2937                         iov_iter_advance(from, cur_len);
2938
2939                         nr_pages =
2940                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2941
2942                         wdata = cifs_writedata_direct_alloc(pagevec,
2943                                              cifs_uncached_writev_complete);
2944                         if (!wdata) {
2945                                 rc = -ENOMEM;
2946                                 add_credits_and_wake_if(server, credits, 0);
2947                                 break;
2948                         }
2949
2950
2951                         wdata->page_offset = start;
2952                         wdata->tailsz =
2953                                 nr_pages > 1 ?
2954                                         cur_len - (PAGE_SIZE - start) -
2955                                         (nr_pages - 2) * PAGE_SIZE :
2956                                         cur_len;
2957                 } else {
2958                         nr_pages = get_numpages(wsize, len, &cur_len);
2959                         wdata = cifs_writedata_alloc(nr_pages,
2960                                              cifs_uncached_writev_complete);
2961                         if (!wdata) {
2962                                 rc = -ENOMEM;
2963                                 add_credits_and_wake_if(server, credits, 0);
2964                                 break;
2965                         }
2966
2967                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2968                         if (rc) {
2969                                 kvfree(wdata->pages);
2970                                 kfree(wdata);
2971                                 add_credits_and_wake_if(server, credits, 0);
2972                                 break;
2973                         }
2974
2975                         num_pages = nr_pages;
2976                         rc = wdata_fill_from_iovec(
2977                                 wdata, from, &cur_len, &num_pages);
2978                         if (rc) {
2979                                 for (i = 0; i < nr_pages; i++)
2980                                         put_page(wdata->pages[i]);
2981                                 kvfree(wdata->pages);
2982                                 kfree(wdata);
2983                                 add_credits_and_wake_if(server, credits, 0);
2984                                 break;
2985                         }
2986
2987                         /*
2988                          * Bring nr_pages down to the number of pages we
2989                          * actually used, and free any pages that we didn't use.
2990                          */
2991                         for ( ; nr_pages > num_pages; nr_pages--)
2992                                 put_page(wdata->pages[nr_pages - 1]);
2993
2994                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2995                 }
2996
2997                 wdata->sync_mode = WB_SYNC_ALL;
2998                 wdata->nr_pages = nr_pages;
2999                 wdata->offset = (__u64)offset;
3000                 wdata->cfile = cifsFileInfo_get(open_file);
3001                 wdata->pid = pid;
3002                 wdata->bytes = cur_len;
3003                 wdata->pagesz = PAGE_SIZE;
3004                 wdata->credits = credits_on_stack;
3005                 wdata->ctx = ctx;
3006                 kref_get(&ctx->refcount);
3007
3008                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3009
3010                 if (!rc) {
3011                         if (wdata->cfile->invalidHandle)
3012                                 rc = -EAGAIN;
3013                         else
3014                                 rc = server->ops->async_writev(wdata,
3015                                         cifs_uncached_writedata_release);
3016                 }
3017
3018                 if (rc) {
3019                         add_credits_and_wake_if(server, &wdata->credits, 0);
3020                         kref_put(&wdata->refcount,
3021                                  cifs_uncached_writedata_release);
3022                         if (rc == -EAGAIN) {
3023                                 *from = saved_from;
3024                                 iov_iter_advance(from, offset - saved_offset);
3025                                 continue;
3026                         }
3027                         break;
3028                 }
3029
3030                 list_add_tail(&wdata->list, wdata_list);
3031                 offset += cur_len;
3032                 len -= cur_len;
3033         } while (len > 0);
3034
3035         free_xid(xid);
3036         return rc;
3037 }
3038
3039 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3040 {
3041         struct cifs_writedata *wdata, *tmp;
3042         struct cifs_tcon *tcon;
3043         struct cifs_sb_info *cifs_sb;
3044         struct dentry *dentry = ctx->cfile->dentry;
3045         int rc;
3046
3047         tcon = tlink_tcon(ctx->cfile->tlink);
3048         cifs_sb = CIFS_SB(dentry->d_sb);
3049
3050         mutex_lock(&ctx->aio_mutex);
3051
3052         if (list_empty(&ctx->list)) {
3053                 mutex_unlock(&ctx->aio_mutex);
3054                 return;
3055         }
3056
3057         rc = ctx->rc;
3058         /*
3059          * Wait for and collect replies for any successful sends in order of
3060          * increasing offset. Once an error is hit, then return without waiting
3061          * for any more replies.
3062          */
3063 restart_loop:
3064         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3065                 if (!rc) {
3066                         if (!try_wait_for_completion(&wdata->done)) {
3067                                 mutex_unlock(&ctx->aio_mutex);
3068                                 return;
3069                         }
3070
3071                         if (wdata->result)
3072                                 rc = wdata->result;
3073                         else
3074                                 ctx->total_len += wdata->bytes;
3075
3076                         /* resend call if it's a retryable error */
3077                         if (rc == -EAGAIN) {
3078                                 struct list_head tmp_list;
3079                                 struct iov_iter tmp_from = ctx->iter;
3080
3081                                 INIT_LIST_HEAD(&tmp_list);
3082                                 list_del_init(&wdata->list);
3083
3084                                 if (ctx->direct_io)
3085                                         rc = cifs_resend_wdata(
3086                                                 wdata, &tmp_list, ctx);
3087                                 else {
3088                                         iov_iter_advance(&tmp_from,
3089                                                  wdata->offset - ctx->pos);
3090
3091                                         rc = cifs_write_from_iter(wdata->offset,
3092                                                 wdata->bytes, &tmp_from,
3093                                                 ctx->cfile, cifs_sb, &tmp_list,
3094                                                 ctx);
3095
3096                                         kref_put(&wdata->refcount,
3097                                                 cifs_uncached_writedata_release);
3098                                 }
3099
3100                                 list_splice(&tmp_list, &ctx->list);
3101                                 goto restart_loop;
3102                         }
3103                 }
3104                 list_del_init(&wdata->list);
3105                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3106         }
3107
3108         cifs_stats_bytes_written(tcon, ctx->total_len);
3109         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3110
3111         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3112
3113         mutex_unlock(&ctx->aio_mutex);
3114
3115         if (ctx->iocb && ctx->iocb->ki_complete)
3116                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3117         else
3118                 complete(&ctx->done);
3119 }
3120
3121 static ssize_t __cifs_writev(
3122         struct kiocb *iocb, struct iov_iter *from, bool direct)
3123 {
3124         struct file *file = iocb->ki_filp;
3125         ssize_t total_written = 0;
3126         struct cifsFileInfo *cfile;
3127         struct cifs_tcon *tcon;
3128         struct cifs_sb_info *cifs_sb;
3129         struct cifs_aio_ctx *ctx;
3130         struct iov_iter saved_from = *from;
3131         size_t len = iov_iter_count(from);
3132         int rc;
3133
3134         /*
3135          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3136          * In this case, fall back to non-direct write function.
3137          * this could be improved by getting pages directly in ITER_KVEC
3138          */
3139         if (direct && iov_iter_is_kvec(from)) {
3140                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3141                 direct = false;
3142         }
3143
3144         rc = generic_write_checks(iocb, from);
3145         if (rc <= 0)
3146                 return rc;
3147
3148         cifs_sb = CIFS_FILE_SB(file);
3149         cfile = file->private_data;
3150         tcon = tlink_tcon(cfile->tlink);
3151
3152         if (!tcon->ses->server->ops->async_writev)
3153                 return -ENOSYS;
3154
3155         ctx = cifs_aio_ctx_alloc();
3156         if (!ctx)
3157                 return -ENOMEM;
3158
3159         ctx->cfile = cifsFileInfo_get(cfile);
3160
3161         if (!is_sync_kiocb(iocb))
3162                 ctx->iocb = iocb;
3163
3164         ctx->pos = iocb->ki_pos;
3165
3166         if (direct) {
3167                 ctx->direct_io = true;
3168                 ctx->iter = *from;
3169                 ctx->len = len;
3170         } else {
3171                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3172                 if (rc) {
3173                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3174                         return rc;
3175                 }
3176         }
3177
3178         /* grab a lock here due to read response handlers can access ctx */
3179         mutex_lock(&ctx->aio_mutex);
3180
3181         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3182                                   cfile, cifs_sb, &ctx->list, ctx);
3183
3184         /*
3185          * If at least one write was successfully sent, then discard any rc
3186          * value from the later writes. If the other write succeeds, then
3187          * we'll end up returning whatever was written. If it fails, then
3188          * we'll get a new rc value from that.
3189          */
3190         if (!list_empty(&ctx->list))
3191                 rc = 0;
3192
3193         mutex_unlock(&ctx->aio_mutex);
3194
3195         if (rc) {
3196                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3197                 return rc;
3198         }
3199
3200         if (!is_sync_kiocb(iocb)) {
3201                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3202                 return -EIOCBQUEUED;
3203         }
3204
3205         rc = wait_for_completion_killable(&ctx->done);
3206         if (rc) {
3207                 mutex_lock(&ctx->aio_mutex);
3208                 ctx->rc = rc = -EINTR;
3209                 total_written = ctx->total_len;
3210                 mutex_unlock(&ctx->aio_mutex);
3211         } else {
3212                 rc = ctx->rc;
3213                 total_written = ctx->total_len;
3214         }
3215
3216         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3217
3218         if (unlikely(!total_written))
3219                 return rc;
3220
3221         iocb->ki_pos += total_written;
3222         return total_written;
3223 }
3224
3225 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3226 {
3227         return __cifs_writev(iocb, from, true);
3228 }
3229
3230 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3231 {
3232         return __cifs_writev(iocb, from, false);
3233 }
3234
3235 static ssize_t
3236 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3237 {
3238         struct file *file = iocb->ki_filp;
3239         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3240         struct inode *inode = file->f_mapping->host;
3241         struct cifsInodeInfo *cinode = CIFS_I(inode);
3242         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3243         ssize_t rc;
3244
3245         inode_lock(inode);
3246         /*
3247          * We need to hold the sem to be sure nobody modifies lock list
3248          * with a brlock that prevents writing.
3249          */
3250         down_read(&cinode->lock_sem);
3251
3252         rc = generic_write_checks(iocb, from);
3253         if (rc <= 0)
3254                 goto out;
3255
3256         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3257                                      server->vals->exclusive_lock_type, 0,
3258                                      NULL, CIFS_WRITE_OP))
3259                 rc = __generic_file_write_iter(iocb, from);
3260         else
3261                 rc = -EACCES;
3262 out:
3263         up_read(&cinode->lock_sem);
3264         inode_unlock(inode);
3265
3266         if (rc > 0)
3267                 rc = generic_write_sync(iocb, rc);
3268         return rc;
3269 }
3270
3271 ssize_t
3272 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3273 {
3274         struct inode *inode = file_inode(iocb->ki_filp);
3275         struct cifsInodeInfo *cinode = CIFS_I(inode);
3276         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3277         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3278                                                 iocb->ki_filp->private_data;
3279         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3280         ssize_t written;
3281
3282         written = cifs_get_writer(cinode);
3283         if (written)
3284                 return written;
3285
3286         if (CIFS_CACHE_WRITE(cinode)) {
3287                 if (cap_unix(tcon->ses) &&
3288                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3289                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3290                         written = generic_file_write_iter(iocb, from);
3291                         goto out;
3292                 }
3293                 written = cifs_writev(iocb, from);
3294                 goto out;
3295         }
3296         /*
3297          * For non-oplocked files in strict cache mode we need to write the data
3298          * to the server exactly from the pos to pos+len-1 rather than flush all
3299          * affected pages because it may cause a error with mandatory locks on
3300          * these pages but not on the region from pos to ppos+len-1.
3301          */
3302         written = cifs_user_writev(iocb, from);
3303         if (CIFS_CACHE_READ(cinode)) {
3304                 /*
3305                  * We have read level caching and we have just sent a write
3306                  * request to the server thus making data in the cache stale.
3307                  * Zap the cache and set oplock/lease level to NONE to avoid
3308                  * reading stale data from the cache. All subsequent read
3309                  * operations will read new data from the server.
3310                  */
3311                 cifs_zap_mapping(inode);
3312                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3313                          inode);
3314                 cinode->oplock = 0;
3315         }
3316 out:
3317         cifs_put_writer(cinode);
3318         return written;
3319 }
3320
3321 static struct cifs_readdata *
3322 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3323 {
3324         struct cifs_readdata *rdata;
3325
3326         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3327         if (rdata != NULL) {
3328                 rdata->pages = pages;
3329                 kref_init(&rdata->refcount);
3330                 INIT_LIST_HEAD(&rdata->list);
3331                 init_completion(&rdata->done);
3332                 INIT_WORK(&rdata->work, complete);
3333         }
3334
3335         return rdata;
3336 }
3337
3338 static struct cifs_readdata *
3339 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3340 {
3341         struct page **pages =
3342                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3343         struct cifs_readdata *ret = NULL;
3344
3345         if (pages) {
3346                 ret = cifs_readdata_direct_alloc(pages, complete);
3347                 if (!ret)
3348                         kfree(pages);
3349         }
3350
3351         return ret;
3352 }
3353
3354 void
3355 cifs_readdata_release(struct kref *refcount)
3356 {
3357         struct cifs_readdata *rdata = container_of(refcount,
3358                                         struct cifs_readdata, refcount);
3359 #ifdef CONFIG_CIFS_SMB_DIRECT
3360         if (rdata->mr) {
3361                 smbd_deregister_mr(rdata->mr);
3362                 rdata->mr = NULL;
3363         }
3364 #endif
3365         if (rdata->cfile)
3366                 cifsFileInfo_put(rdata->cfile);
3367
3368         kvfree(rdata->pages);
3369         kfree(rdata);
3370 }
3371
3372 static int
3373 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3374 {
3375         int rc = 0;
3376         struct page *page;
3377         unsigned int i;
3378
3379         for (i = 0; i < nr_pages; i++) {
3380                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3381                 if (!page) {
3382                         rc = -ENOMEM;
3383                         break;
3384                 }
3385                 rdata->pages[i] = page;
3386         }
3387
3388         if (rc) {
3389                 unsigned int nr_page_failed = i;
3390
3391                 for (i = 0; i < nr_page_failed; i++) {
3392                         put_page(rdata->pages[i]);
3393                         rdata->pages[i] = NULL;
3394                 }
3395         }
3396         return rc;
3397 }
3398
3399 static void
3400 cifs_uncached_readdata_release(struct kref *refcount)
3401 {
3402         struct cifs_readdata *rdata = container_of(refcount,
3403                                         struct cifs_readdata, refcount);
3404         unsigned int i;
3405
3406         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3407         for (i = 0; i < rdata->nr_pages; i++) {
3408                 put_page(rdata->pages[i]);
3409         }
3410         cifs_readdata_release(refcount);
3411 }
3412
3413 /**
3414  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3415  * @rdata:      the readdata response with list of pages holding data
3416  * @iter:       destination for our data
3417  *
3418  * This function copies data from a list of pages in a readdata response into
3419  * an array of iovecs. It will first calculate where the data should go
3420  * based on the info in the readdata and then copy the data into that spot.
3421  */
3422 static int
3423 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3424 {
3425         size_t remaining = rdata->got_bytes;
3426         unsigned int i;
3427
3428         for (i = 0; i < rdata->nr_pages; i++) {
3429                 struct page *page = rdata->pages[i];
3430                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3431                 size_t written;
3432
3433                 if (unlikely(iov_iter_is_pipe(iter))) {
3434                         void *addr = kmap_atomic(page);
3435
3436                         written = copy_to_iter(addr, copy, iter);
3437                         kunmap_atomic(addr);
3438                 } else
3439                         written = copy_page_to_iter(page, 0, copy, iter);
3440                 remaining -= written;
3441                 if (written < copy && iov_iter_count(iter) > 0)
3442                         break;
3443         }
3444         return remaining ? -EFAULT : 0;
3445 }
3446
3447 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3448
3449 static void
3450 cifs_uncached_readv_complete(struct work_struct *work)
3451 {
3452         struct cifs_readdata *rdata = container_of(work,
3453                                                 struct cifs_readdata, work);
3454
3455         complete(&rdata->done);
3456         collect_uncached_read_data(rdata->ctx);
3457         /* the below call can possibly free the last ref to aio ctx */
3458         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3459 }
3460
3461 static int
3462 uncached_fill_pages(struct TCP_Server_Info *server,
3463                     struct cifs_readdata *rdata, struct iov_iter *iter,
3464                     unsigned int len)
3465 {
3466         int result = 0;
3467         unsigned int i;
3468         unsigned int nr_pages = rdata->nr_pages;
3469         unsigned int page_offset = rdata->page_offset;
3470
3471         rdata->got_bytes = 0;
3472         rdata->tailsz = PAGE_SIZE;
3473         for (i = 0; i < nr_pages; i++) {
3474                 struct page *page = rdata->pages[i];
3475                 size_t n;
3476                 unsigned int segment_size = rdata->pagesz;
3477
3478                 if (i == 0)
3479                         segment_size -= page_offset;
3480                 else
3481                         page_offset = 0;
3482
3483
3484                 if (len <= 0) {
3485                         /* no need to hold page hostage */
3486                         rdata->pages[i] = NULL;
3487                         rdata->nr_pages--;
3488                         put_page(page);
3489                         continue;
3490                 }
3491
3492                 n = len;
3493                 if (len >= segment_size)
3494                         /* enough data to fill the page */
3495                         n = segment_size;
3496                 else
3497                         rdata->tailsz = len;
3498                 len -= n;
3499
3500                 if (iter)
3501                         result = copy_page_from_iter(
3502                                         page, page_offset, n, iter);
3503 #ifdef CONFIG_CIFS_SMB_DIRECT
3504                 else if (rdata->mr)
3505                         result = n;
3506 #endif
3507                 else
3508                         result = cifs_read_page_from_socket(
3509                                         server, page, page_offset, n);
3510                 if (result < 0)
3511                         break;
3512
3513                 rdata->got_bytes += result;
3514         }
3515
3516         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3517                                                 rdata->got_bytes : result;
3518 }
3519
3520 static int
3521 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3522                               struct cifs_readdata *rdata, unsigned int len)
3523 {
3524         return uncached_fill_pages(server, rdata, NULL, len);
3525 }
3526
3527 static int
3528 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3529                               struct cifs_readdata *rdata,
3530                               struct iov_iter *iter)
3531 {
3532         return uncached_fill_pages(server, rdata, iter, iter->count);
3533 }
3534
3535 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3536                         struct list_head *rdata_list,
3537                         struct cifs_aio_ctx *ctx)
3538 {
3539         unsigned int rsize;
3540         struct cifs_credits credits;
3541         int rc;
3542         struct TCP_Server_Info *server =
3543                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3544
3545         do {
3546                 if (rdata->cfile->invalidHandle) {
3547                         rc = cifs_reopen_file(rdata->cfile, true);
3548                         if (rc == -EAGAIN)
3549                                 continue;
3550                         else if (rc)
3551                                 break;
3552                 }
3553
3554                 /*
3555                  * Wait for credits to resend this rdata.
3556                  * Note: we are attempting to resend the whole rdata not in
3557                  * segments
3558                  */
3559                 do {
3560                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3561                                                 &rsize, &credits);
3562
3563                         if (rc)
3564                                 goto fail;
3565
3566                         if (rsize < rdata->bytes) {
3567                                 add_credits_and_wake_if(server, &credits, 0);
3568                                 msleep(1000);
3569                         }
3570                 } while (rsize < rdata->bytes);
3571                 rdata->credits = credits;
3572
3573                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3574                 if (!rc) {
3575                         if (rdata->cfile->invalidHandle)
3576                                 rc = -EAGAIN;
3577                         else {
3578 #ifdef CONFIG_CIFS_SMB_DIRECT
3579                                 if (rdata->mr) {
3580                                         rdata->mr->need_invalidate = true;
3581                                         smbd_deregister_mr(rdata->mr);
3582                                         rdata->mr = NULL;
3583                                 }
3584 #endif
3585                                 rc = server->ops->async_readv(rdata);
3586                         }
3587                 }
3588
3589                 /* If the read was successfully sent, we are done */
3590                 if (!rc) {
3591                         /* Add to aio pending list */
3592                         list_add_tail(&rdata->list, rdata_list);
3593                         return 0;
3594                 }
3595
3596                 /* Roll back credits and retry if needed */
3597                 add_credits_and_wake_if(server, &rdata->credits, 0);
3598         } while (rc == -EAGAIN);
3599
3600 fail:
3601         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3602         return rc;
3603 }
3604
3605 static int
3606 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3607                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3608                      struct cifs_aio_ctx *ctx)
3609 {
3610         struct cifs_readdata *rdata;
3611         unsigned int npages, rsize;
3612         struct cifs_credits credits_on_stack;
3613         struct cifs_credits *credits = &credits_on_stack;
3614         size_t cur_len;
3615         int rc;
3616         pid_t pid;
3617         struct TCP_Server_Info *server;
3618         struct page **pagevec;
3619         size_t start;
3620         struct iov_iter direct_iov = ctx->iter;
3621
3622         server = tlink_tcon(open_file->tlink)->ses->server;
3623
3624         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3625                 pid = open_file->pid;
3626         else
3627                 pid = current->tgid;
3628
3629         if (ctx->direct_io)
3630                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3631
3632         do {
3633                 if (open_file->invalidHandle) {
3634                         rc = cifs_reopen_file(open_file, true);
3635                         if (rc == -EAGAIN)
3636                                 continue;
3637                         else if (rc)
3638                                 break;
3639                 }
3640
3641                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3642                                                    &rsize, credits);
3643                 if (rc)
3644                         break;
3645
3646                 cur_len = min_t(const size_t, len, rsize);
3647
3648                 if (ctx->direct_io) {
3649                         ssize_t result;
3650
3651                         result = iov_iter_get_pages_alloc(
3652                                         &direct_iov, &pagevec,
3653                                         cur_len, &start);
3654                         if (result < 0) {
3655                                 cifs_dbg(VFS,
3656                                         "couldn't get user pages (rc=%zd)"
3657                                         " iter type %d"
3658                                         " iov_offset %zd count %zd\n",
3659                                         result, iov_iter_type(&direct_iov),
3660                                         direct_iov.iov_offset,
3661                                         direct_iov.count);
3662                                 dump_stack();
3663
3664                                 rc = result;
3665                                 add_credits_and_wake_if(server, credits, 0);
3666                                 break;
3667                         }
3668                         cur_len = (size_t)result;
3669                         iov_iter_advance(&direct_iov, cur_len);
3670
3671                         rdata = cifs_readdata_direct_alloc(
3672                                         pagevec, cifs_uncached_readv_complete);
3673                         if (!rdata) {
3674                                 add_credits_and_wake_if(server, credits, 0);
3675                                 rc = -ENOMEM;
3676                                 break;
3677                         }
3678
3679                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3680                         rdata->page_offset = start;
3681                         rdata->tailsz = npages > 1 ?
3682                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3683                                 cur_len;
3684
3685                 } else {
3686
3687                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3688                         /* allocate a readdata struct */
3689                         rdata = cifs_readdata_alloc(npages,
3690                                             cifs_uncached_readv_complete);
3691                         if (!rdata) {
3692                                 add_credits_and_wake_if(server, credits, 0);
3693                                 rc = -ENOMEM;
3694                                 break;
3695                         }
3696
3697                         rc = cifs_read_allocate_pages(rdata, npages);
3698                         if (rc) {
3699                                 kvfree(rdata->pages);
3700                                 kfree(rdata);
3701                                 add_credits_and_wake_if(server, credits, 0);
3702                                 break;
3703                         }
3704
3705                         rdata->tailsz = PAGE_SIZE;
3706                 }
3707
3708                 rdata->cfile = cifsFileInfo_get(open_file);
3709                 rdata->nr_pages = npages;
3710                 rdata->offset = offset;
3711                 rdata->bytes = cur_len;
3712                 rdata->pid = pid;
3713                 rdata->pagesz = PAGE_SIZE;
3714                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3715                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3716                 rdata->credits = credits_on_stack;
3717                 rdata->ctx = ctx;
3718                 kref_get(&ctx->refcount);
3719
3720                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3721
3722                 if (!rc) {
3723                         if (rdata->cfile->invalidHandle)
3724                                 rc = -EAGAIN;
3725                         else
3726                                 rc = server->ops->async_readv(rdata);
3727                 }
3728
3729                 if (rc) {
3730                         add_credits_and_wake_if(server, &rdata->credits, 0);
3731                         kref_put(&rdata->refcount,
3732                                 cifs_uncached_readdata_release);
3733                         if (rc == -EAGAIN) {
3734                                 iov_iter_revert(&direct_iov, cur_len);
3735                                 continue;
3736                         }
3737                         break;
3738                 }
3739
3740                 list_add_tail(&rdata->list, rdata_list);
3741                 offset += cur_len;
3742                 len -= cur_len;
3743         } while (len > 0);
3744
3745         return rc;
3746 }
3747
3748 static void
3749 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3750 {
3751         struct cifs_readdata *rdata, *tmp;
3752         struct iov_iter *to = &ctx->iter;
3753         struct cifs_sb_info *cifs_sb;
3754         int rc;
3755
3756         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3757
3758         mutex_lock(&ctx->aio_mutex);
3759
3760         if (list_empty(&ctx->list)) {
3761                 mutex_unlock(&ctx->aio_mutex);
3762                 return;
3763         }
3764
3765         rc = ctx->rc;
3766         /* the loop below should proceed in the order of increasing offsets */
3767 again:
3768         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3769                 if (!rc) {
3770                         if (!try_wait_for_completion(&rdata->done)) {
3771                                 mutex_unlock(&ctx->aio_mutex);
3772                                 return;
3773                         }
3774
3775                         if (rdata->result == -EAGAIN) {
3776                                 /* resend call if it's a retryable error */
3777                                 struct list_head tmp_list;
3778                                 unsigned int got_bytes = rdata->got_bytes;
3779
3780                                 list_del_init(&rdata->list);
3781                                 INIT_LIST_HEAD(&tmp_list);
3782
3783                                 /*
3784                                  * Got a part of data and then reconnect has
3785                                  * happened -- fill the buffer and continue
3786                                  * reading.
3787                                  */
3788                                 if (got_bytes && got_bytes < rdata->bytes) {
3789                                         rc = 0;
3790                                         if (!ctx->direct_io)
3791                                                 rc = cifs_readdata_to_iov(rdata, to);
3792                                         if (rc) {
3793                                                 kref_put(&rdata->refcount,
3794                                                         cifs_uncached_readdata_release);
3795                                                 continue;
3796                                         }
3797                                 }
3798
3799                                 if (ctx->direct_io) {
3800                                         /*
3801                                          * Re-use rdata as this is a
3802                                          * direct I/O
3803                                          */
3804                                         rc = cifs_resend_rdata(
3805                                                 rdata,
3806                                                 &tmp_list, ctx);
3807                                 } else {
3808                                         rc = cifs_send_async_read(
3809                                                 rdata->offset + got_bytes,
3810                                                 rdata->bytes - got_bytes,
3811                                                 rdata->cfile, cifs_sb,
3812                                                 &tmp_list, ctx);
3813
3814                                         kref_put(&rdata->refcount,
3815                                                 cifs_uncached_readdata_release);
3816                                 }
3817
3818                                 list_splice(&tmp_list, &ctx->list);
3819
3820                                 goto again;
3821                         } else if (rdata->result)
3822                                 rc = rdata->result;
3823                         else if (!ctx->direct_io)
3824                                 rc = cifs_readdata_to_iov(rdata, to);
3825
3826                         /* if there was a short read -- discard anything left */
3827                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3828                                 rc = -ENODATA;
3829
3830                         ctx->total_len += rdata->got_bytes;
3831                 }
3832                 list_del_init(&rdata->list);
3833                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3834         }
3835
3836         if (!ctx->direct_io)
3837                 ctx->total_len = ctx->len - iov_iter_count(to);
3838
3839         /* mask nodata case */
3840         if (rc == -ENODATA)
3841                 rc = 0;
3842
3843         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3844
3845         mutex_unlock(&ctx->aio_mutex);
3846
3847         if (ctx->iocb && ctx->iocb->ki_complete)
3848                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3849         else
3850                 complete(&ctx->done);
3851 }
3852
3853 static ssize_t __cifs_readv(
3854         struct kiocb *iocb, struct iov_iter *to, bool direct)
3855 {
3856         size_t len;
3857         struct file *file = iocb->ki_filp;
3858         struct cifs_sb_info *cifs_sb;
3859         struct cifsFileInfo *cfile;
3860         struct cifs_tcon *tcon;
3861         ssize_t rc, total_read = 0;
3862         loff_t offset = iocb->ki_pos;
3863         struct cifs_aio_ctx *ctx;
3864
3865         /*
3866          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3867          * fall back to data copy read path
3868          * this could be improved by getting pages directly in ITER_KVEC
3869          */
3870         if (direct && iov_iter_is_kvec(to)) {
3871                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3872                 direct = false;
3873         }
3874
3875         len = iov_iter_count(to);
3876         if (!len)
3877                 return 0;
3878
3879         cifs_sb = CIFS_FILE_SB(file);
3880         cfile = file->private_data;
3881         tcon = tlink_tcon(cfile->tlink);
3882
3883         if (!tcon->ses->server->ops->async_readv)
3884                 return -ENOSYS;
3885
3886         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3887                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3888
3889         ctx = cifs_aio_ctx_alloc();
3890         if (!ctx)
3891                 return -ENOMEM;
3892
3893         ctx->cfile = cifsFileInfo_get(cfile);
3894
3895         if (!is_sync_kiocb(iocb))
3896                 ctx->iocb = iocb;
3897
3898         if (iter_is_iovec(to))
3899                 ctx->should_dirty = true;
3900
3901         if (direct) {
3902                 ctx->pos = offset;
3903                 ctx->direct_io = true;
3904                 ctx->iter = *to;
3905                 ctx->len = len;
3906         } else {
3907                 rc = setup_aio_ctx_iter(ctx, to, READ);
3908                 if (rc) {
3909                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3910                         return rc;
3911                 }
3912                 len = ctx->len;
3913         }
3914
3915         /* grab a lock here due to read response handlers can access ctx */
3916         mutex_lock(&ctx->aio_mutex);
3917
3918         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3919
3920         /* if at least one read request send succeeded, then reset rc */
3921         if (!list_empty(&ctx->list))
3922                 rc = 0;
3923
3924         mutex_unlock(&ctx->aio_mutex);
3925
3926         if (rc) {
3927                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3928                 return rc;
3929         }
3930
3931         if (!is_sync_kiocb(iocb)) {
3932                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3933                 return -EIOCBQUEUED;
3934         }
3935
3936         rc = wait_for_completion_killable(&ctx->done);
3937         if (rc) {
3938                 mutex_lock(&ctx->aio_mutex);
3939                 ctx->rc = rc = -EINTR;
3940                 total_read = ctx->total_len;
3941                 mutex_unlock(&ctx->aio_mutex);
3942         } else {
3943                 rc = ctx->rc;
3944                 total_read = ctx->total_len;
3945         }
3946
3947         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3948
3949         if (total_read) {
3950                 iocb->ki_pos += total_read;
3951                 return total_read;
3952         }
3953         return rc;
3954 }
3955
3956 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3957 {
3958         return __cifs_readv(iocb, to, true);
3959 }
3960
3961 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3962 {
3963         return __cifs_readv(iocb, to, false);
3964 }
3965
3966 ssize_t
3967 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3968 {
3969         struct inode *inode = file_inode(iocb->ki_filp);
3970         struct cifsInodeInfo *cinode = CIFS_I(inode);
3971         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3972         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3973                                                 iocb->ki_filp->private_data;
3974         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3975         int rc = -EACCES;
3976
3977         /*
3978          * In strict cache mode we need to read from the server all the time
3979          * if we don't have level II oplock because the server can delay mtime
3980          * change - so we can't make a decision about inode invalidating.
3981          * And we can also fail with pagereading if there are mandatory locks
3982          * on pages affected by this read but not on the region from pos to
3983          * pos+len-1.
3984          */
3985         if (!CIFS_CACHE_READ(cinode))
3986                 return cifs_user_readv(iocb, to);
3987
3988         if (cap_unix(tcon->ses) &&
3989             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3990             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3991                 return generic_file_read_iter(iocb, to);
3992
3993         /*
3994          * We need to hold the sem to be sure nobody modifies lock list
3995          * with a brlock that prevents reading.
3996          */
3997         down_read(&cinode->lock_sem);
3998         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3999                                      tcon->ses->server->vals->shared_lock_type,
4000                                      0, NULL, CIFS_READ_OP))
4001                 rc = generic_file_read_iter(iocb, to);
4002         up_read(&cinode->lock_sem);
4003         return rc;
4004 }
4005
4006 static ssize_t
4007 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4008 {
4009         int rc = -EACCES;
4010         unsigned int bytes_read = 0;
4011         unsigned int total_read;
4012         unsigned int current_read_size;
4013         unsigned int rsize;
4014         struct cifs_sb_info *cifs_sb;
4015         struct cifs_tcon *tcon;
4016         struct TCP_Server_Info *server;
4017         unsigned int xid;
4018         char *cur_offset;
4019         struct cifsFileInfo *open_file;
4020         struct cifs_io_parms io_parms;
4021         int buf_type = CIFS_NO_BUFFER;
4022         __u32 pid;
4023
4024         xid = get_xid();
4025         cifs_sb = CIFS_FILE_SB(file);
4026
4027         /* FIXME: set up handlers for larger reads and/or convert to async */
4028         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4029
4030         if (file->private_data == NULL) {
4031                 rc = -EBADF;
4032                 free_xid(xid);
4033                 return rc;
4034         }
4035         open_file = file->private_data;
4036         tcon = tlink_tcon(open_file->tlink);
4037         server = tcon->ses->server;
4038
4039         if (!server->ops->sync_read) {
4040                 free_xid(xid);
4041                 return -ENOSYS;
4042         }
4043
4044         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4045                 pid = open_file->pid;
4046         else
4047                 pid = current->tgid;
4048
4049         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4050                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4051
4052         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4053              total_read += bytes_read, cur_offset += bytes_read) {
4054                 do {
4055                         current_read_size = min_t(uint, read_size - total_read,
4056                                                   rsize);
4057                         /*
4058                          * For windows me and 9x we do not want to request more
4059                          * than it negotiated since it will refuse the read
4060                          * then.
4061                          */
4062                         if ((tcon->ses) && !(tcon->ses->capabilities &
4063                                 tcon->ses->server->vals->cap_large_files)) {
4064                                 current_read_size = min_t(uint,
4065                                         current_read_size, CIFSMaxBufSize);
4066                         }
4067                         if (open_file->invalidHandle) {
4068                                 rc = cifs_reopen_file(open_file, true);
4069                                 if (rc != 0)
4070                                         break;
4071                         }
4072                         io_parms.pid = pid;
4073                         io_parms.tcon = tcon;
4074                         io_parms.offset = *offset;
4075                         io_parms.length = current_read_size;
4076                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4077                                                     &bytes_read, &cur_offset,
4078                                                     &buf_type);
4079                 } while (rc == -EAGAIN);
4080
4081                 if (rc || (bytes_read == 0)) {
4082                         if (total_read) {
4083                                 break;
4084                         } else {
4085                                 free_xid(xid);
4086                                 return rc;
4087                         }
4088                 } else {
4089                         cifs_stats_bytes_read(tcon, total_read);
4090                         *offset += bytes_read;
4091                 }
4092         }
4093         free_xid(xid);
4094         return total_read;
4095 }
4096
4097 /*
4098  * If the page is mmap'ed into a process' page tables, then we need to make
4099  * sure that it doesn't change while being written back.
4100  */
4101 static vm_fault_t
4102 cifs_page_mkwrite(struct vm_fault *vmf)
4103 {
4104         struct page *page = vmf->page;
4105
4106         lock_page(page);
4107         return VM_FAULT_LOCKED;
4108 }
4109
4110 static const struct vm_operations_struct cifs_file_vm_ops = {
4111         .fault = filemap_fault,
4112         .map_pages = filemap_map_pages,
4113         .page_mkwrite = cifs_page_mkwrite,
4114 };
4115
4116 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4117 {
4118         int xid, rc = 0;
4119         struct inode *inode = file_inode(file);
4120
4121         xid = get_xid();
4122
4123         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4124                 rc = cifs_zap_mapping(inode);
4125         if (!rc)
4126                 rc = generic_file_mmap(file, vma);
4127         if (!rc)
4128                 vma->vm_ops = &cifs_file_vm_ops;
4129
4130         free_xid(xid);
4131         return rc;
4132 }
4133
4134 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4135 {
4136         int rc, xid;
4137
4138         xid = get_xid();
4139
4140         rc = cifs_revalidate_file(file);
4141         if (rc)
4142                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4143                          rc);
4144         if (!rc)
4145                 rc = generic_file_mmap(file, vma);
4146         if (!rc)
4147                 vma->vm_ops = &cifs_file_vm_ops;
4148
4149         free_xid(xid);
4150         return rc;
4151 }
4152
4153 static void
4154 cifs_readv_complete(struct work_struct *work)
4155 {
4156         unsigned int i, got_bytes;
4157         struct cifs_readdata *rdata = container_of(work,
4158                                                 struct cifs_readdata, work);
4159
4160         got_bytes = rdata->got_bytes;
4161         for (i = 0; i < rdata->nr_pages; i++) {
4162                 struct page *page = rdata->pages[i];
4163
4164                 lru_cache_add_file(page);
4165
4166                 if (rdata->result == 0 ||
4167                     (rdata->result == -EAGAIN && got_bytes)) {
4168                         flush_dcache_page(page);
4169                         SetPageUptodate(page);
4170                 }
4171
4172                 unlock_page(page);
4173
4174                 if (rdata->result == 0 ||
4175                     (rdata->result == -EAGAIN && got_bytes))
4176                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4177
4178                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4179
4180                 put_page(page);
4181                 rdata->pages[i] = NULL;
4182         }
4183         kref_put(&rdata->refcount, cifs_readdata_release);
4184 }
4185
4186 static int
4187 readpages_fill_pages(struct TCP_Server_Info *server,
4188                      struct cifs_readdata *rdata, struct iov_iter *iter,
4189                      unsigned int len)
4190 {
4191         int result = 0;
4192         unsigned int i;
4193         u64 eof;
4194         pgoff_t eof_index;
4195         unsigned int nr_pages = rdata->nr_pages;
4196         unsigned int page_offset = rdata->page_offset;
4197
4198         /* determine the eof that the server (probably) has */
4199         eof = CIFS_I(rdata->mapping->host)->server_eof;
4200         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4201         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4202
4203         rdata->got_bytes = 0;
4204         rdata->tailsz = PAGE_SIZE;
4205         for (i = 0; i < nr_pages; i++) {
4206                 struct page *page = rdata->pages[i];
4207                 unsigned int to_read = rdata->pagesz;
4208                 size_t n;
4209
4210                 if (i == 0)
4211                         to_read -= page_offset;
4212                 else
4213                         page_offset = 0;
4214
4215                 n = to_read;
4216
4217                 if (len >= to_read) {
4218                         len -= to_read;
4219                 } else if (len > 0) {
4220                         /* enough for partial page, fill and zero the rest */
4221                         zero_user(page, len + page_offset, to_read - len);
4222                         n = rdata->tailsz = len;
4223                         len = 0;
4224                 } else if (page->index > eof_index) {
4225                         /*
4226                          * The VFS will not try to do readahead past the
4227                          * i_size, but it's possible that we have outstanding
4228                          * writes with gaps in the middle and the i_size hasn't
4229                          * caught up yet. Populate those with zeroed out pages
4230                          * to prevent the VFS from repeatedly attempting to
4231                          * fill them until the writes are flushed.
4232                          */
4233                         zero_user(page, 0, PAGE_SIZE);
4234                         lru_cache_add_file(page);
4235                         flush_dcache_page(page);
4236                         SetPageUptodate(page);
4237                         unlock_page(page);
4238                         put_page(page);
4239                         rdata->pages[i] = NULL;
4240                         rdata->nr_pages--;
4241                         continue;
4242                 } else {
4243                         /* no need to hold page hostage */
4244                         lru_cache_add_file(page);
4245                         unlock_page(page);
4246                         put_page(page);
4247                         rdata->pages[i] = NULL;
4248                         rdata->nr_pages--;
4249                         continue;
4250                 }
4251
4252                 if (iter)
4253                         result = copy_page_from_iter(
4254                                         page, page_offset, n, iter);
4255 #ifdef CONFIG_CIFS_SMB_DIRECT
4256                 else if (rdata->mr)
4257                         result = n;
4258 #endif
4259                 else
4260                         result = cifs_read_page_from_socket(
4261                                         server, page, page_offset, n);
4262                 if (result < 0)
4263                         break;
4264
4265                 rdata->got_bytes += result;
4266         }
4267
4268         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4269                                                 rdata->got_bytes : result;
4270 }
4271
4272 static int
4273 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4274                                struct cifs_readdata *rdata, unsigned int len)
4275 {
4276         return readpages_fill_pages(server, rdata, NULL, len);
4277 }
4278
4279 static int
4280 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4281                                struct cifs_readdata *rdata,
4282                                struct iov_iter *iter)
4283 {
4284         return readpages_fill_pages(server, rdata, iter, iter->count);
4285 }
4286
4287 static int
4288 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4289                     unsigned int rsize, struct list_head *tmplist,
4290                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4291 {
4292         struct page *page, *tpage;
4293         unsigned int expected_index;
4294         int rc;
4295         gfp_t gfp = readahead_gfp_mask(mapping);
4296
4297         INIT_LIST_HEAD(tmplist);
4298
4299         page = lru_to_page(page_list);
4300
4301         /*
4302          * Lock the page and put it in the cache. Since no one else
4303          * should have access to this page, we're safe to simply set
4304          * PG_locked without checking it first.
4305          */
4306         __SetPageLocked(page);
4307         rc = add_to_page_cache_locked(page, mapping,
4308                                       page->index, gfp);
4309
4310         /* give up if we can't stick it in the cache */
4311         if (rc) {
4312                 __ClearPageLocked(page);
4313                 return rc;
4314         }
4315
4316         /* move first page to the tmplist */
4317         *offset = (loff_t)page->index << PAGE_SHIFT;
4318         *bytes = PAGE_SIZE;
4319         *nr_pages = 1;
4320         list_move_tail(&page->lru, tmplist);
4321
4322         /* now try and add more pages onto the request */
4323         expected_index = page->index + 1;
4324         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4325                 /* discontinuity ? */
4326                 if (page->index != expected_index)
4327                         break;
4328
4329                 /* would this page push the read over the rsize? */
4330                 if (*bytes + PAGE_SIZE > rsize)
4331                         break;
4332
4333                 __SetPageLocked(page);
4334                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4335                         __ClearPageLocked(page);
4336                         break;
4337                 }
4338                 list_move_tail(&page->lru, tmplist);
4339                 (*bytes) += PAGE_SIZE;
4340                 expected_index++;
4341                 (*nr_pages)++;
4342         }
4343         return rc;
4344 }
4345
4346 static int cifs_readpages(struct file *file, struct address_space *mapping,
4347         struct list_head *page_list, unsigned num_pages)
4348 {
4349         int rc;
4350         struct list_head tmplist;
4351         struct cifsFileInfo *open_file = file->private_data;
4352         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4353         struct TCP_Server_Info *server;
4354         pid_t pid;
4355         unsigned int xid;
4356
4357         xid = get_xid();
4358         /*
4359          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4360          * immediately if the cookie is negative
4361          *
4362          * After this point, every page in the list might have PG_fscache set,
4363          * so we will need to clean that up off of every page we don't use.
4364          */
4365         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4366                                          &num_pages);
4367         if (rc == 0) {
4368                 free_xid(xid);
4369                 return rc;
4370         }
4371
4372         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4373                 pid = open_file->pid;
4374         else
4375                 pid = current->tgid;
4376
4377         rc = 0;
4378         server = tlink_tcon(open_file->tlink)->ses->server;
4379
4380         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4381                  __func__, file, mapping, num_pages);
4382
4383         /*
4384          * Start with the page at end of list and move it to private
4385          * list. Do the same with any following pages until we hit
4386          * the rsize limit, hit an index discontinuity, or run out of
4387          * pages. Issue the async read and then start the loop again
4388          * until the list is empty.
4389          *
4390          * Note that list order is important. The page_list is in
4391          * the order of declining indexes. When we put the pages in
4392          * the rdata->pages, then we want them in increasing order.
4393          */
4394         while (!list_empty(page_list)) {
4395                 unsigned int i, nr_pages, bytes, rsize;
4396                 loff_t offset;
4397                 struct page *page, *tpage;
4398                 struct cifs_readdata *rdata;
4399                 struct cifs_credits credits_on_stack;
4400                 struct cifs_credits *credits = &credits_on_stack;
4401
4402                 if (open_file->invalidHandle) {
4403                         rc = cifs_reopen_file(open_file, true);
4404                         if (rc == -EAGAIN)
4405                                 continue;
4406                         else if (rc)
4407                                 break;
4408                 }
4409
4410                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4411                                                    &rsize, credits);
4412                 if (rc)
4413                         break;
4414
4415                 /*
4416                  * Give up immediately if rsize is too small to read an entire
4417                  * page. The VFS will fall back to readpage. We should never
4418                  * reach this point however since we set ra_pages to 0 when the
4419                  * rsize is smaller than a cache page.
4420                  */
4421                 if (unlikely(rsize < PAGE_SIZE)) {
4422                         add_credits_and_wake_if(server, credits, 0);
4423                         free_xid(xid);
4424                         return 0;
4425                 }
4426
4427                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4428                                          &nr_pages, &offset, &bytes);
4429                 if (rc) {
4430                         add_credits_and_wake_if(server, credits, 0);
4431                         break;
4432                 }
4433
4434                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4435                 if (!rdata) {
4436                         /* best to give up if we're out of mem */
4437                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4438                                 list_del(&page->lru);
4439                                 lru_cache_add_file(page);
4440                                 unlock_page(page);
4441                                 put_page(page);
4442                         }
4443                         rc = -ENOMEM;
4444                         add_credits_and_wake_if(server, credits, 0);
4445                         break;
4446                 }
4447
4448                 rdata->cfile = cifsFileInfo_get(open_file);
4449                 rdata->mapping = mapping;
4450                 rdata->offset = offset;
4451                 rdata->bytes = bytes;
4452                 rdata->pid = pid;
4453                 rdata->pagesz = PAGE_SIZE;
4454                 rdata->tailsz = PAGE_SIZE;
4455                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4456                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4457                 rdata->credits = credits_on_stack;
4458
4459                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4460                         list_del(&page->lru);
4461                         rdata->pages[rdata->nr_pages++] = page;
4462                 }
4463
4464                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4465
4466                 if (!rc) {
4467                         if (rdata->cfile->invalidHandle)
4468                                 rc = -EAGAIN;
4469                         else
4470                                 rc = server->ops->async_readv(rdata);
4471                 }
4472
4473                 if (rc) {
4474                         add_credits_and_wake_if(server, &rdata->credits, 0);
4475                         for (i = 0; i < rdata->nr_pages; i++) {
4476                                 page = rdata->pages[i];
4477                                 lru_cache_add_file(page);
4478                                 unlock_page(page);
4479                                 put_page(page);
4480                         }
4481                         /* Fallback to the readpage in error/reconnect cases */
4482                         kref_put(&rdata->refcount, cifs_readdata_release);
4483                         break;
4484                 }
4485
4486                 kref_put(&rdata->refcount, cifs_readdata_release);
4487         }
4488
4489         /* Any pages that have been shown to fscache but didn't get added to
4490          * the pagecache must be uncached before they get returned to the
4491          * allocator.
4492          */
4493         cifs_fscache_readpages_cancel(mapping->host, page_list);
4494         free_xid(xid);
4495         return rc;
4496 }
4497
4498 /*
4499  * cifs_readpage_worker must be called with the page pinned
4500  */
4501 static int cifs_readpage_worker(struct file *file, struct page *page,
4502         loff_t *poffset)
4503 {
4504         char *read_data;
4505         int rc;
4506
4507         /* Is the page cached? */
4508         rc = cifs_readpage_from_fscache(file_inode(file), page);
4509         if (rc == 0)
4510                 goto read_complete;
4511
4512         read_data = kmap(page);
4513         /* for reads over a certain size could initiate async read ahead */
4514
4515         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4516
4517         if (rc < 0)
4518                 goto io_error;
4519         else
4520                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4521
4522         /* we do not want atime to be less than mtime, it broke some apps */
4523         file_inode(file)->i_atime = current_time(file_inode(file));
4524         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4525                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4526         else
4527                 file_inode(file)->i_atime = current_time(file_inode(file));
4528
4529         if (PAGE_SIZE > rc)
4530                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4531
4532         flush_dcache_page(page);
4533         SetPageUptodate(page);
4534
4535         /* send this page to the cache */
4536         cifs_readpage_to_fscache(file_inode(file), page);
4537
4538         rc = 0;
4539
4540 io_error:
4541         kunmap(page);
4542         unlock_page(page);
4543
4544 read_complete:
4545         return rc;
4546 }
4547
4548 static int cifs_readpage(struct file *file, struct page *page)
4549 {
4550         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4551         int rc = -EACCES;
4552         unsigned int xid;
4553
4554         xid = get_xid();
4555
4556         if (file->private_data == NULL) {
4557                 rc = -EBADF;
4558                 free_xid(xid);
4559                 return rc;
4560         }
4561
4562         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4563                  page, (int)offset, (int)offset);
4564
4565         rc = cifs_readpage_worker(file, page, &offset);
4566
4567         free_xid(xid);
4568         return rc;
4569 }
4570
4571 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4572 {
4573         struct cifsFileInfo *open_file;
4574
4575         spin_lock(&cifs_inode->open_file_lock);
4576         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4577                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4578                         spin_unlock(&cifs_inode->open_file_lock);
4579                         return 1;
4580                 }
4581         }
4582         spin_unlock(&cifs_inode->open_file_lock);
4583         return 0;
4584 }
4585
4586 /* We do not want to update the file size from server for inodes
4587    open for write - to avoid races with writepage extending
4588    the file - in the future we could consider allowing
4589    refreshing the inode only on increases in the file size
4590    but this is tricky to do without racing with writebehind
4591    page caching in the current Linux kernel design */
4592 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4593 {
4594         if (!cifsInode)
4595                 return true;
4596
4597         if (is_inode_writable(cifsInode)) {
4598                 /* This inode is open for write at least once */
4599                 struct cifs_sb_info *cifs_sb;
4600
4601                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4602                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4603                         /* since no page cache to corrupt on directio
4604                         we can change size safely */
4605                         return true;
4606                 }
4607
4608                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4609                         return true;
4610
4611                 return false;
4612         } else
4613                 return true;
4614 }
4615
4616 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4617                         loff_t pos, unsigned len, unsigned flags,
4618                         struct page **pagep, void **fsdata)
4619 {
4620         int oncethru = 0;
4621         pgoff_t index = pos >> PAGE_SHIFT;
4622         loff_t offset = pos & (PAGE_SIZE - 1);
4623         loff_t page_start = pos & PAGE_MASK;
4624         loff_t i_size;
4625         struct page *page;
4626         int rc = 0;
4627
4628         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4629
4630 start:
4631         page = grab_cache_page_write_begin(mapping, index, flags);
4632         if (!page) {
4633                 rc = -ENOMEM;
4634                 goto out;
4635         }
4636
4637         if (PageUptodate(page))
4638                 goto out;
4639
4640         /*
4641          * If we write a full page it will be up to date, no need to read from
4642          * the server. If the write is short, we'll end up doing a sync write
4643          * instead.
4644          */
4645         if (len == PAGE_SIZE)
4646                 goto out;
4647
4648         /*
4649          * optimize away the read when we have an oplock, and we're not
4650          * expecting to use any of the data we'd be reading in. That
4651          * is, when the page lies beyond the EOF, or straddles the EOF
4652          * and the write will cover all of the existing data.
4653          */
4654         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4655                 i_size = i_size_read(mapping->host);
4656                 if (page_start >= i_size ||
4657                     (offset == 0 && (pos + len) >= i_size)) {
4658                         zero_user_segments(page, 0, offset,
4659                                            offset + len,
4660                                            PAGE_SIZE);
4661                         /*
4662                          * PageChecked means that the parts of the page
4663                          * to which we're not writing are considered up
4664                          * to date. Once the data is copied to the
4665                          * page, it can be set uptodate.
4666                          */
4667                         SetPageChecked(page);
4668                         goto out;
4669                 }
4670         }
4671
4672         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4673                 /*
4674                  * might as well read a page, it is fast enough. If we get
4675                  * an error, we don't need to return it. cifs_write_end will
4676                  * do a sync write instead since PG_uptodate isn't set.
4677                  */
4678                 cifs_readpage_worker(file, page, &page_start);
4679                 put_page(page);
4680                 oncethru = 1;
4681                 goto start;
4682         } else {
4683                 /* we could try using another file handle if there is one -
4684                    but how would we lock it to prevent close of that handle
4685                    racing with this read? In any case
4686                    this will be written out by write_end so is fine */
4687         }
4688 out:
4689         *pagep = page;
4690         return rc;
4691 }
4692
4693 static int cifs_release_page(struct page *page, gfp_t gfp)
4694 {
4695         if (PagePrivate(page))
4696                 return 0;
4697
4698         return cifs_fscache_release_page(page, gfp);
4699 }
4700
4701 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4702                                  unsigned int length)
4703 {
4704         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4705
4706         if (offset == 0 && length == PAGE_SIZE)
4707                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4708 }
4709
4710 static int cifs_launder_page(struct page *page)
4711 {
4712         int rc = 0;
4713         loff_t range_start = page_offset(page);
4714         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4715         struct writeback_control wbc = {
4716                 .sync_mode = WB_SYNC_ALL,
4717                 .nr_to_write = 0,
4718                 .range_start = range_start,
4719                 .range_end = range_end,
4720         };
4721
4722         cifs_dbg(FYI, "Launder page: %p\n", page);
4723
4724         if (clear_page_dirty_for_io(page))
4725                 rc = cifs_writepage_locked(page, &wbc);
4726
4727         cifs_fscache_invalidate_page(page, page->mapping->host);
4728         return rc;
4729 }
4730
4731 void cifs_oplock_break(struct work_struct *work)
4732 {
4733         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4734                                                   oplock_break);
4735         struct inode *inode = d_inode(cfile->dentry);
4736         struct cifsInodeInfo *cinode = CIFS_I(inode);
4737         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4738         struct TCP_Server_Info *server = tcon->ses->server;
4739         int rc = 0;
4740         bool purge_cache = false;
4741
4742         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4743                         TASK_UNINTERRUPTIBLE);
4744
4745         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4746                                       cfile->oplock_epoch, &purge_cache);
4747
4748         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4749                                                 cifs_has_mand_locks(cinode)) {
4750                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4751                          inode);
4752                 cinode->oplock = 0;
4753         }
4754
4755         if (inode && S_ISREG(inode->i_mode)) {
4756                 if (CIFS_CACHE_READ(cinode))
4757                         break_lease(inode, O_RDONLY);
4758                 else
4759                         break_lease(inode, O_WRONLY);
4760                 rc = filemap_fdatawrite(inode->i_mapping);
4761                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4762                         rc = filemap_fdatawait(inode->i_mapping);
4763                         mapping_set_error(inode->i_mapping, rc);
4764                         cifs_zap_mapping(inode);
4765                 }
4766                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4767                 if (CIFS_CACHE_WRITE(cinode))
4768                         goto oplock_break_ack;
4769         }
4770
4771         rc = cifs_push_locks(cfile);
4772         if (rc)
4773                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4774
4775 oplock_break_ack:
4776         /*
4777          * releasing stale oplock after recent reconnect of smb session using
4778          * a now incorrect file handle is not a data integrity issue but do
4779          * not bother sending an oplock release if session to server still is
4780          * disconnected since oplock already released by the server
4781          */
4782         if (!cfile->oplock_break_cancelled) {
4783                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4784                                                              cinode);
4785                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4786         }
4787         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4788         cifs_done_oplock_break(cinode);
4789 }
4790
4791 /*
4792  * The presence of cifs_direct_io() in the address space ops vector
4793  * allowes open() O_DIRECT flags which would have failed otherwise.
4794  *
4795  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4796  * so this method should never be called.
4797  *
4798  * Direct IO is not yet supported in the cached mode. 
4799  */
4800 static ssize_t
4801 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4802 {
4803         /*
4804          * FIXME
4805          * Eventually need to support direct IO for non forcedirectio mounts
4806          */
4807         return -EINVAL;
4808 }
4809
4810
4811 const struct address_space_operations cifs_addr_ops = {
4812         .readpage = cifs_readpage,
4813         .readpages = cifs_readpages,
4814         .writepage = cifs_writepage,
4815         .writepages = cifs_writepages,
4816         .write_begin = cifs_write_begin,
4817         .write_end = cifs_write_end,
4818         .set_page_dirty = __set_page_dirty_nobuffers,
4819         .releasepage = cifs_release_page,
4820         .direct_IO = cifs_direct_io,
4821         .invalidatepage = cifs_invalidate_page,
4822         .launder_page = cifs_launder_page,
4823 };
4824
4825 /*
4826  * cifs_readpages requires the server to support a buffer large enough to
4827  * contain the header plus one complete page of data.  Otherwise, we need
4828  * to leave cifs_readpages out of the address space operations.
4829  */
4830 const struct address_space_operations cifs_addr_ops_smallbuf = {
4831         .readpage = cifs_readpage,
4832         .writepage = cifs_writepage,
4833         .writepages = cifs_writepages,
4834         .write_begin = cifs_write_begin,
4835         .write_end = cifs_write_end,
4836         .set_page_dirty = __set_page_dirty_nobuffers,
4837         .releasepage = cifs_release_page,
4838         .invalidatepage = cifs_invalidate_page,
4839         .launder_page = cifs_launder_page,
4840 };