OSDN Git Service

clk: at91: fix masterck name
[uclinux-h8/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256 out:
257         kfree(buf);
258         return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264         struct cifs_fid_locks *cur;
265         bool has_locks = false;
266
267         down_read(&cinode->lock_sem);
268         list_for_each_entry(cur, &cinode->llist, llist) {
269                 if (!list_empty(&cur->locks)) {
270                         has_locks = true;
271                         break;
272                 }
273         }
274         up_read(&cinode->lock_sem);
275         return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280                   struct tcon_link *tlink, __u32 oplock)
281 {
282         struct dentry *dentry = file_dentry(file);
283         struct inode *inode = d_inode(dentry);
284         struct cifsInodeInfo *cinode = CIFS_I(inode);
285         struct cifsFileInfo *cfile;
286         struct cifs_fid_locks *fdlocks;
287         struct cifs_tcon *tcon = tlink_tcon(tlink);
288         struct TCP_Server_Info *server = tcon->ses->server;
289
290         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291         if (cfile == NULL)
292                 return cfile;
293
294         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295         if (!fdlocks) {
296                 kfree(cfile);
297                 return NULL;
298         }
299
300         INIT_LIST_HEAD(&fdlocks->locks);
301         fdlocks->cfile = cfile;
302         cfile->llist = fdlocks;
303         down_write(&cinode->lock_sem);
304         list_add(&fdlocks->llist, &cinode->llist);
305         up_write(&cinode->lock_sem);
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->tlink = cifs_get_tlink(tlink);
314         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315         mutex_init(&cfile->fh_mutex);
316         spin_lock_init(&cfile->file_info_lock);
317
318         cifs_sb_active(inode->i_sb);
319
320         /*
321          * If the server returned a read oplock and we have mandatory brlocks,
322          * set oplock level to None.
323          */
324         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326                 oplock = 0;
327         }
328
329         spin_lock(&tcon->open_file_lock);
330         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331                 oplock = fid->pending_open->oplock;
332         list_del(&fid->pending_open->olist);
333
334         fid->purge_cache = false;
335         server->ops->set_fid(cfile, fid, oplock);
336
337         list_add(&cfile->tlist, &tcon->openFileList);
338         atomic_inc(&tcon->num_local_opens);
339
340         /* if readable file instance put first in list*/
341         if (file->f_mode & FMODE_READ)
342                 list_add(&cfile->flist, &cinode->openFileList);
343         else
344                 list_add_tail(&cfile->flist, &cinode->openFileList);
345         spin_unlock(&tcon->open_file_lock);
346
347         if (fid->purge_cache)
348                 cifs_zap_mapping(inode);
349
350         file->private_data = cfile;
351         return cfile;
352 }
353
354 struct cifsFileInfo *
355 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356 {
357         spin_lock(&cifs_file->file_info_lock);
358         cifsFileInfo_get_locked(cifs_file);
359         spin_unlock(&cifs_file->file_info_lock);
360         return cifs_file;
361 }
362
363 /*
364  * Release a reference on the file private data. This may involve closing
365  * the filehandle out on the server. Must be called without holding
366  * tcon->open_file_lock and cifs_file->file_info_lock.
367  */
368 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369 {
370         struct inode *inode = d_inode(cifs_file->dentry);
371         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372         struct TCP_Server_Info *server = tcon->ses->server;
373         struct cifsInodeInfo *cifsi = CIFS_I(inode);
374         struct super_block *sb = inode->i_sb;
375         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376         struct cifsLockInfo *li, *tmp;
377         struct cifs_fid fid;
378         struct cifs_pending_open open;
379         bool oplock_break_cancelled;
380
381         spin_lock(&tcon->open_file_lock);
382
383         spin_lock(&cifs_file->file_info_lock);
384         if (--cifs_file->count > 0) {
385                 spin_unlock(&cifs_file->file_info_lock);
386                 spin_unlock(&tcon->open_file_lock);
387                 return;
388         }
389         spin_unlock(&cifs_file->file_info_lock);
390
391         if (server->ops->get_lease_key)
392                 server->ops->get_lease_key(inode, &fid);
393
394         /* store open in pending opens to make sure we don't miss lease break */
395         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
396
397         /* remove it from the lists */
398         list_del(&cifs_file->flist);
399         list_del(&cifs_file->tlist);
400         atomic_dec(&tcon->num_local_opens);
401
402         if (list_empty(&cifsi->openFileList)) {
403                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
404                          d_inode(cifs_file->dentry));
405                 /*
406                  * In strict cache mode we need invalidate mapping on the last
407                  * close  because it may cause a error when we open this file
408                  * again and get at least level II oplock.
409                  */
410                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412                 cifs_set_oplock_level(cifsi, 0);
413         }
414
415         spin_unlock(&tcon->open_file_lock);
416
417         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418
419         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420                 struct TCP_Server_Info *server = tcon->ses->server;
421                 unsigned int xid;
422
423                 xid = get_xid();
424                 if (server->ops->close)
425                         server->ops->close(xid, tcon, &cifs_file->fid);
426                 _free_xid(xid);
427         }
428
429         if (oplock_break_cancelled)
430                 cifs_done_oplock_break(cifsi);
431
432         cifs_del_pending_open(&open);
433
434         /*
435          * Delete any outstanding lock records. We'll lose them when the file
436          * is closed anyway.
437          */
438         down_write(&cifsi->lock_sem);
439         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440                 list_del(&li->llist);
441                 cifs_del_lock_waiters(li);
442                 kfree(li);
443         }
444         list_del(&cifs_file->llist->llist);
445         kfree(cifs_file->llist);
446         up_write(&cifsi->lock_sem);
447
448         cifs_put_tlink(cifs_file->tlink);
449         dput(cifs_file->dentry);
450         cifs_sb_deactive(sb);
451         kfree(cifs_file);
452 }
453
454 int cifs_open(struct inode *inode, struct file *file)
455
456 {
457         int rc = -EACCES;
458         unsigned int xid;
459         __u32 oplock;
460         struct cifs_sb_info *cifs_sb;
461         struct TCP_Server_Info *server;
462         struct cifs_tcon *tcon;
463         struct tcon_link *tlink;
464         struct cifsFileInfo *cfile = NULL;
465         char *full_path = NULL;
466         bool posix_open_ok = false;
467         struct cifs_fid fid;
468         struct cifs_pending_open open;
469
470         xid = get_xid();
471
472         cifs_sb = CIFS_SB(inode->i_sb);
473         tlink = cifs_sb_tlink(cifs_sb);
474         if (IS_ERR(tlink)) {
475                 free_xid(xid);
476                 return PTR_ERR(tlink);
477         }
478         tcon = tlink_tcon(tlink);
479         server = tcon->ses->server;
480
481         full_path = build_path_from_dentry(file_dentry(file));
482         if (full_path == NULL) {
483                 rc = -ENOMEM;
484                 goto out;
485         }
486
487         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488                  inode, file->f_flags, full_path);
489
490         if (file->f_flags & O_DIRECT &&
491             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
492                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
493                         file->f_op = &cifs_file_direct_nobrl_ops;
494                 else
495                         file->f_op = &cifs_file_direct_ops;
496         }
497
498         if (server->oplocks)
499                 oplock = REQ_OPLOCK;
500         else
501                 oplock = 0;
502
503         if (!tcon->broken_posix_open && tcon->unix_ext &&
504             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
505                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506                 /* can not refresh inode info since size could be stale */
507                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508                                 cifs_sb->mnt_file_mode /* ignored */,
509                                 file->f_flags, &oplock, &fid.netfid, xid);
510                 if (rc == 0) {
511                         cifs_dbg(FYI, "posix open succeeded\n");
512                         posix_open_ok = true;
513                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
514                         if (tcon->ses->serverNOS)
515                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
516                                          tcon->ses->serverName,
517                                          tcon->ses->serverNOS);
518                         tcon->broken_posix_open = true;
519                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
520                          (rc != -EOPNOTSUPP)) /* path not found or net err */
521                         goto out;
522                 /*
523                  * Else fallthrough to retry open the old way on network i/o
524                  * or DFS errors.
525                  */
526         }
527
528         if (server->ops->get_lease_key)
529                 server->ops->get_lease_key(inode, &fid);
530
531         cifs_add_pending_open(&fid, tlink, &open);
532
533         if (!posix_open_ok) {
534                 if (server->ops->get_lease_key)
535                         server->ops->get_lease_key(inode, &fid);
536
537                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538                                   file->f_flags, &oplock, &fid, xid);
539                 if (rc) {
540                         cifs_del_pending_open(&open);
541                         goto out;
542                 }
543         }
544
545         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
546         if (cfile == NULL) {
547                 if (server->ops->close)
548                         server->ops->close(xid, tcon, &fid);
549                 cifs_del_pending_open(&open);
550                 rc = -ENOMEM;
551                 goto out;
552         }
553
554         cifs_fscache_set_inode_cookie(inode, file);
555
556         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557                 /*
558                  * Time to set mode which we can not set earlier due to
559                  * problems creating new read-only files.
560                  */
561                 struct cifs_unix_set_info_args args = {
562                         .mode   = inode->i_mode,
563                         .uid    = INVALID_UID, /* no change */
564                         .gid    = INVALID_GID, /* no change */
565                         .ctime  = NO_CHANGE_64,
566                         .atime  = NO_CHANGE_64,
567                         .mtime  = NO_CHANGE_64,
568                         .device = 0,
569                 };
570                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
571                                        cfile->pid);
572         }
573
574 out:
575         kfree(full_path);
576         free_xid(xid);
577         cifs_put_tlink(tlink);
578         return rc;
579 }
580
581 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
582
583 /*
584  * Try to reacquire byte range locks that were released when session
585  * to server was lost.
586  */
587 static int
588 cifs_relock_file(struct cifsFileInfo *cfile)
589 {
590         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
592         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
593         int rc = 0;
594
595         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
596         if (cinode->can_cache_brlcks) {
597                 /* can cache locks - no need to relock */
598                 up_read(&cinode->lock_sem);
599                 return rc;
600         }
601
602         if (cap_unix(tcon->ses) &&
603             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
604             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
605                 rc = cifs_push_posix_locks(cfile);
606         else
607                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
608
609         up_read(&cinode->lock_sem);
610         return rc;
611 }
612
613 static int
614 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
615 {
616         int rc = -EACCES;
617         unsigned int xid;
618         __u32 oplock;
619         struct cifs_sb_info *cifs_sb;
620         struct cifs_tcon *tcon;
621         struct TCP_Server_Info *server;
622         struct cifsInodeInfo *cinode;
623         struct inode *inode;
624         char *full_path = NULL;
625         int desired_access;
626         int disposition = FILE_OPEN;
627         int create_options = CREATE_NOT_DIR;
628         struct cifs_open_parms oparms;
629
630         xid = get_xid();
631         mutex_lock(&cfile->fh_mutex);
632         if (!cfile->invalidHandle) {
633                 mutex_unlock(&cfile->fh_mutex);
634                 rc = 0;
635                 free_xid(xid);
636                 return rc;
637         }
638
639         inode = d_inode(cfile->dentry);
640         cifs_sb = CIFS_SB(inode->i_sb);
641         tcon = tlink_tcon(cfile->tlink);
642         server = tcon->ses->server;
643
644         /*
645          * Can not grab rename sem here because various ops, including those
646          * that already have the rename sem can end up causing writepage to get
647          * called and if the server was down that means we end up here, and we
648          * can never tell if the caller already has the rename_sem.
649          */
650         full_path = build_path_from_dentry(cfile->dentry);
651         if (full_path == NULL) {
652                 rc = -ENOMEM;
653                 mutex_unlock(&cfile->fh_mutex);
654                 free_xid(xid);
655                 return rc;
656         }
657
658         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
659                  inode, cfile->f_flags, full_path);
660
661         if (tcon->ses->server->oplocks)
662                 oplock = REQ_OPLOCK;
663         else
664                 oplock = 0;
665
666         if (tcon->unix_ext && cap_unix(tcon->ses) &&
667             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669                 /*
670                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
671                  * original open. Must mask them off for a reopen.
672                  */
673                 unsigned int oflags = cfile->f_flags &
674                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
675
676                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677                                      cifs_sb->mnt_file_mode /* ignored */,
678                                      oflags, &oplock, &cfile->fid.netfid, xid);
679                 if (rc == 0) {
680                         cifs_dbg(FYI, "posix reopen succeeded\n");
681                         oparms.reconnect = true;
682                         goto reopen_success;
683                 }
684                 /*
685                  * fallthrough to retry open the old way on errors, especially
686                  * in the reconnect path it is important to retry hard
687                  */
688         }
689
690         desired_access = cifs_convert_flags(cfile->f_flags);
691
692         if (backup_cred(cifs_sb))
693                 create_options |= CREATE_OPEN_BACKUP_INTENT;
694
695         if (server->ops->get_lease_key)
696                 server->ops->get_lease_key(inode, &cfile->fid);
697
698         oparms.tcon = tcon;
699         oparms.cifs_sb = cifs_sb;
700         oparms.desired_access = desired_access;
701         oparms.create_options = create_options;
702         oparms.disposition = disposition;
703         oparms.path = full_path;
704         oparms.fid = &cfile->fid;
705         oparms.reconnect = true;
706
707         /*
708          * Can not refresh inode by passing in file_info buf to be returned by
709          * ops->open and then calling get_inode_info with returned buf since
710          * file might have write behind data that needs to be flushed and server
711          * version of file size can be stale. If we knew for sure that inode was
712          * not dirty locally we could do this.
713          */
714         rc = server->ops->open(xid, &oparms, &oplock, NULL);
715         if (rc == -ENOENT && oparms.reconnect == false) {
716                 /* durable handle timeout is expired - open the file again */
717                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
718                 /* indicate that we need to relock the file */
719                 oparms.reconnect = true;
720         }
721
722         if (rc) {
723                 mutex_unlock(&cfile->fh_mutex);
724                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
725                 cifs_dbg(FYI, "oplock: %d\n", oplock);
726                 goto reopen_error_exit;
727         }
728
729 reopen_success:
730         cfile->invalidHandle = false;
731         mutex_unlock(&cfile->fh_mutex);
732         cinode = CIFS_I(inode);
733
734         if (can_flush) {
735                 rc = filemap_write_and_wait(inode->i_mapping);
736                 mapping_set_error(inode->i_mapping, rc);
737
738                 if (tcon->unix_ext)
739                         rc = cifs_get_inode_info_unix(&inode, full_path,
740                                                       inode->i_sb, xid);
741                 else
742                         rc = cifs_get_inode_info(&inode, full_path, NULL,
743                                                  inode->i_sb, xid, NULL);
744         }
745         /*
746          * Else we are writing out data to server already and could deadlock if
747          * we tried to flush data, and since we do not know if we have data that
748          * would invalidate the current end of file on the server we can not go
749          * to the server to get the new inode info.
750          */
751
752         /*
753          * If the server returned a read oplock and we have mandatory brlocks,
754          * set oplock level to None.
755          */
756         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
757                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
758                 oplock = 0;
759         }
760
761         server->ops->set_fid(cfile, &cfile->fid, oplock);
762         if (oparms.reconnect)
763                 cifs_relock_file(cfile);
764
765 reopen_error_exit:
766         kfree(full_path);
767         free_xid(xid);
768         return rc;
769 }
770
771 int cifs_close(struct inode *inode, struct file *file)
772 {
773         if (file->private_data != NULL) {
774                 cifsFileInfo_put(file->private_data);
775                 file->private_data = NULL;
776         }
777
778         /* return code from the ->release op is always ignored */
779         return 0;
780 }
781
782 void
783 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
784 {
785         struct cifsFileInfo *open_file;
786         struct list_head *tmp;
787         struct list_head *tmp1;
788         struct list_head tmp_list;
789
790         if (!tcon->use_persistent || !tcon->need_reopen_files)
791                 return;
792
793         tcon->need_reopen_files = false;
794
795         cifs_dbg(FYI, "Reopen persistent handles");
796         INIT_LIST_HEAD(&tmp_list);
797
798         /* list all files open on tree connection, reopen resilient handles  */
799         spin_lock(&tcon->open_file_lock);
800         list_for_each(tmp, &tcon->openFileList) {
801                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
802                 if (!open_file->invalidHandle)
803                         continue;
804                 cifsFileInfo_get(open_file);
805                 list_add_tail(&open_file->rlist, &tmp_list);
806         }
807         spin_unlock(&tcon->open_file_lock);
808
809         list_for_each_safe(tmp, tmp1, &tmp_list) {
810                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
811                 if (cifs_reopen_file(open_file, false /* do not flush */))
812                         tcon->need_reopen_files = true;
813                 list_del_init(&open_file->rlist);
814                 cifsFileInfo_put(open_file);
815         }
816 }
817
818 int cifs_closedir(struct inode *inode, struct file *file)
819 {
820         int rc = 0;
821         unsigned int xid;
822         struct cifsFileInfo *cfile = file->private_data;
823         struct cifs_tcon *tcon;
824         struct TCP_Server_Info *server;
825         char *buf;
826
827         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
828
829         if (cfile == NULL)
830                 return rc;
831
832         xid = get_xid();
833         tcon = tlink_tcon(cfile->tlink);
834         server = tcon->ses->server;
835
836         cifs_dbg(FYI, "Freeing private data in close dir\n");
837         spin_lock(&cfile->file_info_lock);
838         if (server->ops->dir_needs_close(cfile)) {
839                 cfile->invalidHandle = true;
840                 spin_unlock(&cfile->file_info_lock);
841                 if (server->ops->close_dir)
842                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
843                 else
844                         rc = -ENOSYS;
845                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
846                 /* not much we can do if it fails anyway, ignore rc */
847                 rc = 0;
848         } else
849                 spin_unlock(&cfile->file_info_lock);
850
851         buf = cfile->srch_inf.ntwrk_buf_start;
852         if (buf) {
853                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
854                 cfile->srch_inf.ntwrk_buf_start = NULL;
855                 if (cfile->srch_inf.smallBuf)
856                         cifs_small_buf_release(buf);
857                 else
858                         cifs_buf_release(buf);
859         }
860
861         cifs_put_tlink(cfile->tlink);
862         kfree(file->private_data);
863         file->private_data = NULL;
864         /* BB can we lock the filestruct while this is going on? */
865         free_xid(xid);
866         return rc;
867 }
868
869 static struct cifsLockInfo *
870 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
871 {
872         struct cifsLockInfo *lock =
873                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
874         if (!lock)
875                 return lock;
876         lock->offset = offset;
877         lock->length = length;
878         lock->type = type;
879         lock->pid = current->tgid;
880         lock->flags = flags;
881         INIT_LIST_HEAD(&lock->blist);
882         init_waitqueue_head(&lock->block_q);
883         return lock;
884 }
885
886 void
887 cifs_del_lock_waiters(struct cifsLockInfo *lock)
888 {
889         struct cifsLockInfo *li, *tmp;
890         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
891                 list_del_init(&li->blist);
892                 wake_up(&li->block_q);
893         }
894 }
895
896 #define CIFS_LOCK_OP    0
897 #define CIFS_READ_OP    1
898 #define CIFS_WRITE_OP   2
899
900 /* @rw_check : 0 - no op, 1 - read, 2 - write */
901 static bool
902 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
903                             __u64 length, __u8 type, __u16 flags,
904                             struct cifsFileInfo *cfile,
905                             struct cifsLockInfo **conf_lock, int rw_check)
906 {
907         struct cifsLockInfo *li;
908         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
909         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
910
911         list_for_each_entry(li, &fdlocks->locks, llist) {
912                 if (offset + length <= li->offset ||
913                     offset >= li->offset + li->length)
914                         continue;
915                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
916                     server->ops->compare_fids(cfile, cur_cfile)) {
917                         /* shared lock prevents write op through the same fid */
918                         if (!(li->type & server->vals->shared_lock_type) ||
919                             rw_check != CIFS_WRITE_OP)
920                                 continue;
921                 }
922                 if ((type & server->vals->shared_lock_type) &&
923                     ((server->ops->compare_fids(cfile, cur_cfile) &&
924                      current->tgid == li->pid) || type == li->type))
925                         continue;
926                 if (rw_check == CIFS_LOCK_OP &&
927                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
928                     server->ops->compare_fids(cfile, cur_cfile))
929                         continue;
930                 if (conf_lock)
931                         *conf_lock = li;
932                 return true;
933         }
934         return false;
935 }
936
937 bool
938 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
939                         __u8 type, __u16 flags,
940                         struct cifsLockInfo **conf_lock, int rw_check)
941 {
942         bool rc = false;
943         struct cifs_fid_locks *cur;
944         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
945
946         list_for_each_entry(cur, &cinode->llist, llist) {
947                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
948                                                  flags, cfile, conf_lock,
949                                                  rw_check);
950                 if (rc)
951                         break;
952         }
953
954         return rc;
955 }
956
957 /*
958  * Check if there is another lock that prevents us to set the lock (mandatory
959  * style). If such a lock exists, update the flock structure with its
960  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
961  * or leave it the same if we can't. Returns 0 if we don't need to request to
962  * the server or 1 otherwise.
963  */
964 static int
965 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
966                __u8 type, struct file_lock *flock)
967 {
968         int rc = 0;
969         struct cifsLockInfo *conf_lock;
970         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
971         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
972         bool exist;
973
974         down_read(&cinode->lock_sem);
975
976         exist = cifs_find_lock_conflict(cfile, offset, length, type,
977                                         flock->fl_flags, &conf_lock,
978                                         CIFS_LOCK_OP);
979         if (exist) {
980                 flock->fl_start = conf_lock->offset;
981                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
982                 flock->fl_pid = conf_lock->pid;
983                 if (conf_lock->type & server->vals->shared_lock_type)
984                         flock->fl_type = F_RDLCK;
985                 else
986                         flock->fl_type = F_WRLCK;
987         } else if (!cinode->can_cache_brlcks)
988                 rc = 1;
989         else
990                 flock->fl_type = F_UNLCK;
991
992         up_read(&cinode->lock_sem);
993         return rc;
994 }
995
996 static void
997 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
998 {
999         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1000         down_write(&cinode->lock_sem);
1001         list_add_tail(&lock->llist, &cfile->llist->locks);
1002         up_write(&cinode->lock_sem);
1003 }
1004
1005 /*
1006  * Set the byte-range lock (mandatory style). Returns:
1007  * 1) 0, if we set the lock and don't need to request to the server;
1008  * 2) 1, if no locks prevent us but we need to request to the server;
1009  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1010  */
1011 static int
1012 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1013                  bool wait)
1014 {
1015         struct cifsLockInfo *conf_lock;
1016         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1017         bool exist;
1018         int rc = 0;
1019
1020 try_again:
1021         exist = false;
1022         down_write(&cinode->lock_sem);
1023
1024         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1025                                         lock->type, lock->flags, &conf_lock,
1026                                         CIFS_LOCK_OP);
1027         if (!exist && cinode->can_cache_brlcks) {
1028                 list_add_tail(&lock->llist, &cfile->llist->locks);
1029                 up_write(&cinode->lock_sem);
1030                 return rc;
1031         }
1032
1033         if (!exist)
1034                 rc = 1;
1035         else if (!wait)
1036                 rc = -EACCES;
1037         else {
1038                 list_add_tail(&lock->blist, &conf_lock->blist);
1039                 up_write(&cinode->lock_sem);
1040                 rc = wait_event_interruptible(lock->block_q,
1041                                         (lock->blist.prev == &lock->blist) &&
1042                                         (lock->blist.next == &lock->blist));
1043                 if (!rc)
1044                         goto try_again;
1045                 down_write(&cinode->lock_sem);
1046                 list_del_init(&lock->blist);
1047         }
1048
1049         up_write(&cinode->lock_sem);
1050         return rc;
1051 }
1052
1053 /*
1054  * Check if there is another lock that prevents us to set the lock (posix
1055  * style). If such a lock exists, update the flock structure with its
1056  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1057  * or leave it the same if we can't. Returns 0 if we don't need to request to
1058  * the server or 1 otherwise.
1059  */
1060 static int
1061 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1062 {
1063         int rc = 0;
1064         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1065         unsigned char saved_type = flock->fl_type;
1066
1067         if ((flock->fl_flags & FL_POSIX) == 0)
1068                 return 1;
1069
1070         down_read(&cinode->lock_sem);
1071         posix_test_lock(file, flock);
1072
1073         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1074                 flock->fl_type = saved_type;
1075                 rc = 1;
1076         }
1077
1078         up_read(&cinode->lock_sem);
1079         return rc;
1080 }
1081
1082 /*
1083  * Set the byte-range lock (posix style). Returns:
1084  * 1) 0, if we set the lock and don't need to request to the server;
1085  * 2) 1, if we need to request to the server;
1086  * 3) <0, if the error occurs while setting the lock.
1087  */
1088 static int
1089 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1090 {
1091         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1092         int rc = 1;
1093
1094         if ((flock->fl_flags & FL_POSIX) == 0)
1095                 return rc;
1096
1097 try_again:
1098         down_write(&cinode->lock_sem);
1099         if (!cinode->can_cache_brlcks) {
1100                 up_write(&cinode->lock_sem);
1101                 return rc;
1102         }
1103
1104         rc = posix_lock_file(file, flock, NULL);
1105         up_write(&cinode->lock_sem);
1106         if (rc == FILE_LOCK_DEFERRED) {
1107                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1108                 if (!rc)
1109                         goto try_again;
1110                 locks_delete_block(flock);
1111         }
1112         return rc;
1113 }
1114
1115 int
1116 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1117 {
1118         unsigned int xid;
1119         int rc = 0, stored_rc;
1120         struct cifsLockInfo *li, *tmp;
1121         struct cifs_tcon *tcon;
1122         unsigned int num, max_num, max_buf;
1123         LOCKING_ANDX_RANGE *buf, *cur;
1124         static const int types[] = {
1125                 LOCKING_ANDX_LARGE_FILES,
1126                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1127         };
1128         int i;
1129
1130         xid = get_xid();
1131         tcon = tlink_tcon(cfile->tlink);
1132
1133         /*
1134          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1135          * and check it for zero before using.
1136          */
1137         max_buf = tcon->ses->server->maxBuf;
1138         if (!max_buf) {
1139                 free_xid(xid);
1140                 return -EINVAL;
1141         }
1142
1143         max_num = (max_buf - sizeof(struct smb_hdr)) /
1144                                                 sizeof(LOCKING_ANDX_RANGE);
1145         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1146         if (!buf) {
1147                 free_xid(xid);
1148                 return -ENOMEM;
1149         }
1150
1151         for (i = 0; i < 2; i++) {
1152                 cur = buf;
1153                 num = 0;
1154                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1155                         if (li->type != types[i])
1156                                 continue;
1157                         cur->Pid = cpu_to_le16(li->pid);
1158                         cur->LengthLow = cpu_to_le32((u32)li->length);
1159                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1160                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1161                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1162                         if (++num == max_num) {
1163                                 stored_rc = cifs_lockv(xid, tcon,
1164                                                        cfile->fid.netfid,
1165                                                        (__u8)li->type, 0, num,
1166                                                        buf);
1167                                 if (stored_rc)
1168                                         rc = stored_rc;
1169                                 cur = buf;
1170                                 num = 0;
1171                         } else
1172                                 cur++;
1173                 }
1174
1175                 if (num) {
1176                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1177                                                (__u8)types[i], 0, num, buf);
1178                         if (stored_rc)
1179                                 rc = stored_rc;
1180                 }
1181         }
1182
1183         kfree(buf);
1184         free_xid(xid);
1185         return rc;
1186 }
1187
1188 static __u32
1189 hash_lockowner(fl_owner_t owner)
1190 {
1191         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1192 }
1193
1194 struct lock_to_push {
1195         struct list_head llist;
1196         __u64 offset;
1197         __u64 length;
1198         __u32 pid;
1199         __u16 netfid;
1200         __u8 type;
1201 };
1202
1203 static int
1204 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1205 {
1206         struct inode *inode = d_inode(cfile->dentry);
1207         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1208         struct file_lock *flock;
1209         struct file_lock_context *flctx = inode->i_flctx;
1210         unsigned int count = 0, i;
1211         int rc = 0, xid, type;
1212         struct list_head locks_to_send, *el;
1213         struct lock_to_push *lck, *tmp;
1214         __u64 length;
1215
1216         xid = get_xid();
1217
1218         if (!flctx)
1219                 goto out;
1220
1221         spin_lock(&flctx->flc_lock);
1222         list_for_each(el, &flctx->flc_posix) {
1223                 count++;
1224         }
1225         spin_unlock(&flctx->flc_lock);
1226
1227         INIT_LIST_HEAD(&locks_to_send);
1228
1229         /*
1230          * Allocating count locks is enough because no FL_POSIX locks can be
1231          * added to the list while we are holding cinode->lock_sem that
1232          * protects locking operations of this inode.
1233          */
1234         for (i = 0; i < count; i++) {
1235                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1236                 if (!lck) {
1237                         rc = -ENOMEM;
1238                         goto err_out;
1239                 }
1240                 list_add_tail(&lck->llist, &locks_to_send);
1241         }
1242
1243         el = locks_to_send.next;
1244         spin_lock(&flctx->flc_lock);
1245         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1246                 if (el == &locks_to_send) {
1247                         /*
1248                          * The list ended. We don't have enough allocated
1249                          * structures - something is really wrong.
1250                          */
1251                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1252                         break;
1253                 }
1254                 length = 1 + flock->fl_end - flock->fl_start;
1255                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1256                         type = CIFS_RDLCK;
1257                 else
1258                         type = CIFS_WRLCK;
1259                 lck = list_entry(el, struct lock_to_push, llist);
1260                 lck->pid = hash_lockowner(flock->fl_owner);
1261                 lck->netfid = cfile->fid.netfid;
1262                 lck->length = length;
1263                 lck->type = type;
1264                 lck->offset = flock->fl_start;
1265         }
1266         spin_unlock(&flctx->flc_lock);
1267
1268         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1269                 int stored_rc;
1270
1271                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1272                                              lck->offset, lck->length, NULL,
1273                                              lck->type, 0);
1274                 if (stored_rc)
1275                         rc = stored_rc;
1276                 list_del(&lck->llist);
1277                 kfree(lck);
1278         }
1279
1280 out:
1281         free_xid(xid);
1282         return rc;
1283 err_out:
1284         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1285                 list_del(&lck->llist);
1286                 kfree(lck);
1287         }
1288         goto out;
1289 }
1290
1291 static int
1292 cifs_push_locks(struct cifsFileInfo *cfile)
1293 {
1294         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1295         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1296         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1297         int rc = 0;
1298
1299         /* we are going to update can_cache_brlcks here - need a write access */
1300         down_write(&cinode->lock_sem);
1301         if (!cinode->can_cache_brlcks) {
1302                 up_write(&cinode->lock_sem);
1303                 return rc;
1304         }
1305
1306         if (cap_unix(tcon->ses) &&
1307             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1308             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1309                 rc = cifs_push_posix_locks(cfile);
1310         else
1311                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1312
1313         cinode->can_cache_brlcks = false;
1314         up_write(&cinode->lock_sem);
1315         return rc;
1316 }
1317
1318 static void
1319 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1320                 bool *wait_flag, struct TCP_Server_Info *server)
1321 {
1322         if (flock->fl_flags & FL_POSIX)
1323                 cifs_dbg(FYI, "Posix\n");
1324         if (flock->fl_flags & FL_FLOCK)
1325                 cifs_dbg(FYI, "Flock\n");
1326         if (flock->fl_flags & FL_SLEEP) {
1327                 cifs_dbg(FYI, "Blocking lock\n");
1328                 *wait_flag = true;
1329         }
1330         if (flock->fl_flags & FL_ACCESS)
1331                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1332         if (flock->fl_flags & FL_LEASE)
1333                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1334         if (flock->fl_flags &
1335             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1336                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1337                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1338
1339         *type = server->vals->large_lock_type;
1340         if (flock->fl_type == F_WRLCK) {
1341                 cifs_dbg(FYI, "F_WRLCK\n");
1342                 *type |= server->vals->exclusive_lock_type;
1343                 *lock = 1;
1344         } else if (flock->fl_type == F_UNLCK) {
1345                 cifs_dbg(FYI, "F_UNLCK\n");
1346                 *type |= server->vals->unlock_lock_type;
1347                 *unlock = 1;
1348                 /* Check if unlock includes more than one lock range */
1349         } else if (flock->fl_type == F_RDLCK) {
1350                 cifs_dbg(FYI, "F_RDLCK\n");
1351                 *type |= server->vals->shared_lock_type;
1352                 *lock = 1;
1353         } else if (flock->fl_type == F_EXLCK) {
1354                 cifs_dbg(FYI, "F_EXLCK\n");
1355                 *type |= server->vals->exclusive_lock_type;
1356                 *lock = 1;
1357         } else if (flock->fl_type == F_SHLCK) {
1358                 cifs_dbg(FYI, "F_SHLCK\n");
1359                 *type |= server->vals->shared_lock_type;
1360                 *lock = 1;
1361         } else
1362                 cifs_dbg(FYI, "Unknown type of lock\n");
1363 }
1364
1365 static int
1366 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1367            bool wait_flag, bool posix_lck, unsigned int xid)
1368 {
1369         int rc = 0;
1370         __u64 length = 1 + flock->fl_end - flock->fl_start;
1371         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1372         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1373         struct TCP_Server_Info *server = tcon->ses->server;
1374         __u16 netfid = cfile->fid.netfid;
1375
1376         if (posix_lck) {
1377                 int posix_lock_type;
1378
1379                 rc = cifs_posix_lock_test(file, flock);
1380                 if (!rc)
1381                         return rc;
1382
1383                 if (type & server->vals->shared_lock_type)
1384                         posix_lock_type = CIFS_RDLCK;
1385                 else
1386                         posix_lock_type = CIFS_WRLCK;
1387                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1388                                       hash_lockowner(flock->fl_owner),
1389                                       flock->fl_start, length, flock,
1390                                       posix_lock_type, wait_flag);
1391                 return rc;
1392         }
1393
1394         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1395         if (!rc)
1396                 return rc;
1397
1398         /* BB we could chain these into one lock request BB */
1399         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1400                                     1, 0, false);
1401         if (rc == 0) {
1402                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1403                                             type, 0, 1, false);
1404                 flock->fl_type = F_UNLCK;
1405                 if (rc != 0)
1406                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1407                                  rc);
1408                 return 0;
1409         }
1410
1411         if (type & server->vals->shared_lock_type) {
1412                 flock->fl_type = F_WRLCK;
1413                 return 0;
1414         }
1415
1416         type &= ~server->vals->exclusive_lock_type;
1417
1418         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1419                                     type | server->vals->shared_lock_type,
1420                                     1, 0, false);
1421         if (rc == 0) {
1422                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1423                         type | server->vals->shared_lock_type, 0, 1, false);
1424                 flock->fl_type = F_RDLCK;
1425                 if (rc != 0)
1426                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1427                                  rc);
1428         } else
1429                 flock->fl_type = F_WRLCK;
1430
1431         return 0;
1432 }
1433
1434 void
1435 cifs_move_llist(struct list_head *source, struct list_head *dest)
1436 {
1437         struct list_head *li, *tmp;
1438         list_for_each_safe(li, tmp, source)
1439                 list_move(li, dest);
1440 }
1441
1442 void
1443 cifs_free_llist(struct list_head *llist)
1444 {
1445         struct cifsLockInfo *li, *tmp;
1446         list_for_each_entry_safe(li, tmp, llist, llist) {
1447                 cifs_del_lock_waiters(li);
1448                 list_del(&li->llist);
1449                 kfree(li);
1450         }
1451 }
1452
1453 int
1454 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1455                   unsigned int xid)
1456 {
1457         int rc = 0, stored_rc;
1458         static const int types[] = {
1459                 LOCKING_ANDX_LARGE_FILES,
1460                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1461         };
1462         unsigned int i;
1463         unsigned int max_num, num, max_buf;
1464         LOCKING_ANDX_RANGE *buf, *cur;
1465         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1466         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1467         struct cifsLockInfo *li, *tmp;
1468         __u64 length = 1 + flock->fl_end - flock->fl_start;
1469         struct list_head tmp_llist;
1470
1471         INIT_LIST_HEAD(&tmp_llist);
1472
1473         /*
1474          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1475          * and check it for zero before using.
1476          */
1477         max_buf = tcon->ses->server->maxBuf;
1478         if (!max_buf)
1479                 return -EINVAL;
1480
1481         max_num = (max_buf - sizeof(struct smb_hdr)) /
1482                                                 sizeof(LOCKING_ANDX_RANGE);
1483         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1484         if (!buf)
1485                 return -ENOMEM;
1486
1487         down_write(&cinode->lock_sem);
1488         for (i = 0; i < 2; i++) {
1489                 cur = buf;
1490                 num = 0;
1491                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1492                         if (flock->fl_start > li->offset ||
1493                             (flock->fl_start + length) <
1494                             (li->offset + li->length))
1495                                 continue;
1496                         if (current->tgid != li->pid)
1497                                 continue;
1498                         if (types[i] != li->type)
1499                                 continue;
1500                         if (cinode->can_cache_brlcks) {
1501                                 /*
1502                                  * We can cache brlock requests - simply remove
1503                                  * a lock from the file's list.
1504                                  */
1505                                 list_del(&li->llist);
1506                                 cifs_del_lock_waiters(li);
1507                                 kfree(li);
1508                                 continue;
1509                         }
1510                         cur->Pid = cpu_to_le16(li->pid);
1511                         cur->LengthLow = cpu_to_le32((u32)li->length);
1512                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1513                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1514                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1515                         /*
1516                          * We need to save a lock here to let us add it again to
1517                          * the file's list if the unlock range request fails on
1518                          * the server.
1519                          */
1520                         list_move(&li->llist, &tmp_llist);
1521                         if (++num == max_num) {
1522                                 stored_rc = cifs_lockv(xid, tcon,
1523                                                        cfile->fid.netfid,
1524                                                        li->type, num, 0, buf);
1525                                 if (stored_rc) {
1526                                         /*
1527                                          * We failed on the unlock range
1528                                          * request - add all locks from the tmp
1529                                          * list to the head of the file's list.
1530                                          */
1531                                         cifs_move_llist(&tmp_llist,
1532                                                         &cfile->llist->locks);
1533                                         rc = stored_rc;
1534                                 } else
1535                                         /*
1536                                          * The unlock range request succeed -
1537                                          * free the tmp list.
1538                                          */
1539                                         cifs_free_llist(&tmp_llist);
1540                                 cur = buf;
1541                                 num = 0;
1542                         } else
1543                                 cur++;
1544                 }
1545                 if (num) {
1546                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1547                                                types[i], num, 0, buf);
1548                         if (stored_rc) {
1549                                 cifs_move_llist(&tmp_llist,
1550                                                 &cfile->llist->locks);
1551                                 rc = stored_rc;
1552                         } else
1553                                 cifs_free_llist(&tmp_llist);
1554                 }
1555         }
1556
1557         up_write(&cinode->lock_sem);
1558         kfree(buf);
1559         return rc;
1560 }
1561
1562 static int
1563 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1564            bool wait_flag, bool posix_lck, int lock, int unlock,
1565            unsigned int xid)
1566 {
1567         int rc = 0;
1568         __u64 length = 1 + flock->fl_end - flock->fl_start;
1569         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1570         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1571         struct TCP_Server_Info *server = tcon->ses->server;
1572         struct inode *inode = d_inode(cfile->dentry);
1573
1574         if (posix_lck) {
1575                 int posix_lock_type;
1576
1577                 rc = cifs_posix_lock_set(file, flock);
1578                 if (!rc || rc < 0)
1579                         return rc;
1580
1581                 if (type & server->vals->shared_lock_type)
1582                         posix_lock_type = CIFS_RDLCK;
1583                 else
1584                         posix_lock_type = CIFS_WRLCK;
1585
1586                 if (unlock == 1)
1587                         posix_lock_type = CIFS_UNLCK;
1588
1589                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1590                                       hash_lockowner(flock->fl_owner),
1591                                       flock->fl_start, length,
1592                                       NULL, posix_lock_type, wait_flag);
1593                 goto out;
1594         }
1595
1596         if (lock) {
1597                 struct cifsLockInfo *lock;
1598
1599                 lock = cifs_lock_init(flock->fl_start, length, type,
1600                                       flock->fl_flags);
1601                 if (!lock)
1602                         return -ENOMEM;
1603
1604                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1605                 if (rc < 0) {
1606                         kfree(lock);
1607                         return rc;
1608                 }
1609                 if (!rc)
1610                         goto out;
1611
1612                 /*
1613                  * Windows 7 server can delay breaking lease from read to None
1614                  * if we set a byte-range lock on a file - break it explicitly
1615                  * before sending the lock to the server to be sure the next
1616                  * read won't conflict with non-overlapted locks due to
1617                  * pagereading.
1618                  */
1619                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1620                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1621                         cifs_zap_mapping(inode);
1622                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1623                                  inode);
1624                         CIFS_I(inode)->oplock = 0;
1625                 }
1626
1627                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1628                                             type, 1, 0, wait_flag);
1629                 if (rc) {
1630                         kfree(lock);
1631                         return rc;
1632                 }
1633
1634                 cifs_lock_add(cfile, lock);
1635         } else if (unlock)
1636                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1637
1638 out:
1639         if (flock->fl_flags & FL_POSIX && !rc)
1640                 rc = locks_lock_file_wait(file, flock);
1641         return rc;
1642 }
1643
1644 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1645 {
1646         int rc, xid;
1647         int lock = 0, unlock = 0;
1648         bool wait_flag = false;
1649         bool posix_lck = false;
1650         struct cifs_sb_info *cifs_sb;
1651         struct cifs_tcon *tcon;
1652         struct cifsInodeInfo *cinode;
1653         struct cifsFileInfo *cfile;
1654         __u16 netfid;
1655         __u32 type;
1656
1657         rc = -EACCES;
1658         xid = get_xid();
1659
1660         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1661                  cmd, flock->fl_flags, flock->fl_type,
1662                  flock->fl_start, flock->fl_end);
1663
1664         cfile = (struct cifsFileInfo *)file->private_data;
1665         tcon = tlink_tcon(cfile->tlink);
1666
1667         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1668                         tcon->ses->server);
1669         cifs_sb = CIFS_FILE_SB(file);
1670         netfid = cfile->fid.netfid;
1671         cinode = CIFS_I(file_inode(file));
1672
1673         if (cap_unix(tcon->ses) &&
1674             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1675             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1676                 posix_lck = true;
1677         /*
1678          * BB add code here to normalize offset and length to account for
1679          * negative length which we can not accept over the wire.
1680          */
1681         if (IS_GETLK(cmd)) {
1682                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1683                 free_xid(xid);
1684                 return rc;
1685         }
1686
1687         if (!lock && !unlock) {
1688                 /*
1689                  * if no lock or unlock then nothing to do since we do not
1690                  * know what it is
1691                  */
1692                 free_xid(xid);
1693                 return -EOPNOTSUPP;
1694         }
1695
1696         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1697                         xid);
1698         free_xid(xid);
1699         return rc;
1700 }
1701
1702 /*
1703  * update the file size (if needed) after a write. Should be called with
1704  * the inode->i_lock held
1705  */
1706 void
1707 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1708                       unsigned int bytes_written)
1709 {
1710         loff_t end_of_write = offset + bytes_written;
1711
1712         if (end_of_write > cifsi->server_eof)
1713                 cifsi->server_eof = end_of_write;
1714 }
1715
1716 static ssize_t
1717 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1718            size_t write_size, loff_t *offset)
1719 {
1720         int rc = 0;
1721         unsigned int bytes_written = 0;
1722         unsigned int total_written;
1723         struct cifs_sb_info *cifs_sb;
1724         struct cifs_tcon *tcon;
1725         struct TCP_Server_Info *server;
1726         unsigned int xid;
1727         struct dentry *dentry = open_file->dentry;
1728         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1729         struct cifs_io_parms io_parms;
1730
1731         cifs_sb = CIFS_SB(dentry->d_sb);
1732
1733         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1734                  write_size, *offset, dentry);
1735
1736         tcon = tlink_tcon(open_file->tlink);
1737         server = tcon->ses->server;
1738
1739         if (!server->ops->sync_write)
1740                 return -ENOSYS;
1741
1742         xid = get_xid();
1743
1744         for (total_written = 0; write_size > total_written;
1745              total_written += bytes_written) {
1746                 rc = -EAGAIN;
1747                 while (rc == -EAGAIN) {
1748                         struct kvec iov[2];
1749                         unsigned int len;
1750
1751                         if (open_file->invalidHandle) {
1752                                 /* we could deadlock if we called
1753                                    filemap_fdatawait from here so tell
1754                                    reopen_file not to flush data to
1755                                    server now */
1756                                 rc = cifs_reopen_file(open_file, false);
1757                                 if (rc != 0)
1758                                         break;
1759                         }
1760
1761                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1762                                   (unsigned int)write_size - total_written);
1763                         /* iov[0] is reserved for smb header */
1764                         iov[1].iov_base = (char *)write_data + total_written;
1765                         iov[1].iov_len = len;
1766                         io_parms.pid = pid;
1767                         io_parms.tcon = tcon;
1768                         io_parms.offset = *offset;
1769                         io_parms.length = len;
1770                         rc = server->ops->sync_write(xid, &open_file->fid,
1771                                         &io_parms, &bytes_written, iov, 1);
1772                 }
1773                 if (rc || (bytes_written == 0)) {
1774                         if (total_written)
1775                                 break;
1776                         else {
1777                                 free_xid(xid);
1778                                 return rc;
1779                         }
1780                 } else {
1781                         spin_lock(&d_inode(dentry)->i_lock);
1782                         cifs_update_eof(cifsi, *offset, bytes_written);
1783                         spin_unlock(&d_inode(dentry)->i_lock);
1784                         *offset += bytes_written;
1785                 }
1786         }
1787
1788         cifs_stats_bytes_written(tcon, total_written);
1789
1790         if (total_written > 0) {
1791                 spin_lock(&d_inode(dentry)->i_lock);
1792                 if (*offset > d_inode(dentry)->i_size)
1793                         i_size_write(d_inode(dentry), *offset);
1794                 spin_unlock(&d_inode(dentry)->i_lock);
1795         }
1796         mark_inode_dirty_sync(d_inode(dentry));
1797         free_xid(xid);
1798         return total_written;
1799 }
1800
1801 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1802                                         bool fsuid_only)
1803 {
1804         struct cifsFileInfo *open_file = NULL;
1805         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1806         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1807
1808         /* only filter by fsuid on multiuser mounts */
1809         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1810                 fsuid_only = false;
1811
1812         spin_lock(&tcon->open_file_lock);
1813         /* we could simply get the first_list_entry since write-only entries
1814            are always at the end of the list but since the first entry might
1815            have a close pending, we go through the whole list */
1816         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1817                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1818                         continue;
1819                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1820                         if (!open_file->invalidHandle) {
1821                                 /* found a good file */
1822                                 /* lock it so it will not be closed on us */
1823                                 cifsFileInfo_get(open_file);
1824                                 spin_unlock(&tcon->open_file_lock);
1825                                 return open_file;
1826                         } /* else might as well continue, and look for
1827                              another, or simply have the caller reopen it
1828                              again rather than trying to fix this handle */
1829                 } else /* write only file */
1830                         break; /* write only files are last so must be done */
1831         }
1832         spin_unlock(&tcon->open_file_lock);
1833         return NULL;
1834 }
1835
1836 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1837                                         bool fsuid_only)
1838 {
1839         struct cifsFileInfo *open_file, *inv_file = NULL;
1840         struct cifs_sb_info *cifs_sb;
1841         struct cifs_tcon *tcon;
1842         bool any_available = false;
1843         int rc;
1844         unsigned int refind = 0;
1845
1846         /* Having a null inode here (because mapping->host was set to zero by
1847         the VFS or MM) should not happen but we had reports of on oops (due to
1848         it being zero) during stress testcases so we need to check for it */
1849
1850         if (cifs_inode == NULL) {
1851                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1852                 dump_stack();
1853                 return NULL;
1854         }
1855
1856         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1857         tcon = cifs_sb_master_tcon(cifs_sb);
1858
1859         /* only filter by fsuid on multiuser mounts */
1860         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1861                 fsuid_only = false;
1862
1863         spin_lock(&tcon->open_file_lock);
1864 refind_writable:
1865         if (refind > MAX_REOPEN_ATT) {
1866                 spin_unlock(&tcon->open_file_lock);
1867                 return NULL;
1868         }
1869         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1870                 if (!any_available && open_file->pid != current->tgid)
1871                         continue;
1872                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1873                         continue;
1874                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1875                         if (!open_file->invalidHandle) {
1876                                 /* found a good writable file */
1877                                 cifsFileInfo_get(open_file);
1878                                 spin_unlock(&tcon->open_file_lock);
1879                                 return open_file;
1880                         } else {
1881                                 if (!inv_file)
1882                                         inv_file = open_file;
1883                         }
1884                 }
1885         }
1886         /* couldn't find useable FH with same pid, try any available */
1887         if (!any_available) {
1888                 any_available = true;
1889                 goto refind_writable;
1890         }
1891
1892         if (inv_file) {
1893                 any_available = false;
1894                 cifsFileInfo_get(inv_file);
1895         }
1896
1897         spin_unlock(&tcon->open_file_lock);
1898
1899         if (inv_file) {
1900                 rc = cifs_reopen_file(inv_file, false);
1901                 if (!rc)
1902                         return inv_file;
1903                 else {
1904                         spin_lock(&tcon->open_file_lock);
1905                         list_move_tail(&inv_file->flist,
1906                                         &cifs_inode->openFileList);
1907                         spin_unlock(&tcon->open_file_lock);
1908                         cifsFileInfo_put(inv_file);
1909                         ++refind;
1910                         inv_file = NULL;
1911                         spin_lock(&tcon->open_file_lock);
1912                         goto refind_writable;
1913                 }
1914         }
1915
1916         return NULL;
1917 }
1918
1919 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1920 {
1921         struct address_space *mapping = page->mapping;
1922         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1923         char *write_data;
1924         int rc = -EFAULT;
1925         int bytes_written = 0;
1926         struct inode *inode;
1927         struct cifsFileInfo *open_file;
1928
1929         if (!mapping || !mapping->host)
1930                 return -EFAULT;
1931
1932         inode = page->mapping->host;
1933
1934         offset += (loff_t)from;
1935         write_data = kmap(page);
1936         write_data += from;
1937
1938         if ((to > PAGE_SIZE) || (from > to)) {
1939                 kunmap(page);
1940                 return -EIO;
1941         }
1942
1943         /* racing with truncate? */
1944         if (offset > mapping->host->i_size) {
1945                 kunmap(page);
1946                 return 0; /* don't care */
1947         }
1948
1949         /* check to make sure that we are not extending the file */
1950         if (mapping->host->i_size - offset < (loff_t)to)
1951                 to = (unsigned)(mapping->host->i_size - offset);
1952
1953         open_file = find_writable_file(CIFS_I(mapping->host), false);
1954         if (open_file) {
1955                 bytes_written = cifs_write(open_file, open_file->pid,
1956                                            write_data, to - from, &offset);
1957                 cifsFileInfo_put(open_file);
1958                 /* Does mm or vfs already set times? */
1959                 inode->i_atime = inode->i_mtime = current_time(inode);
1960                 if ((bytes_written > 0) && (offset))
1961                         rc = 0;
1962                 else if (bytes_written < 0)
1963                         rc = bytes_written;
1964         } else {
1965                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1966                 rc = -EIO;
1967         }
1968
1969         kunmap(page);
1970         return rc;
1971 }
1972
1973 static struct cifs_writedata *
1974 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1975                           pgoff_t end, pgoff_t *index,
1976                           unsigned int *found_pages)
1977 {
1978         struct cifs_writedata *wdata;
1979
1980         wdata = cifs_writedata_alloc((unsigned int)tofind,
1981                                      cifs_writev_complete);
1982         if (!wdata)
1983                 return NULL;
1984
1985         *found_pages = find_get_pages_range_tag(mapping, index, end,
1986                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1987         return wdata;
1988 }
1989
1990 static unsigned int
1991 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1992                     struct address_space *mapping,
1993                     struct writeback_control *wbc,
1994                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1995 {
1996         unsigned int nr_pages = 0, i;
1997         struct page *page;
1998
1999         for (i = 0; i < found_pages; i++) {
2000                 page = wdata->pages[i];
2001                 /*
2002                  * At this point we hold neither the i_pages lock nor the
2003                  * page lock: the page may be truncated or invalidated
2004                  * (changing page->mapping to NULL), or even swizzled
2005                  * back from swapper_space to tmpfs file mapping
2006                  */
2007
2008                 if (nr_pages == 0)
2009                         lock_page(page);
2010                 else if (!trylock_page(page))
2011                         break;
2012
2013                 if (unlikely(page->mapping != mapping)) {
2014                         unlock_page(page);
2015                         break;
2016                 }
2017
2018                 if (!wbc->range_cyclic && page->index > end) {
2019                         *done = true;
2020                         unlock_page(page);
2021                         break;
2022                 }
2023
2024                 if (*next && (page->index != *next)) {
2025                         /* Not next consecutive page */
2026                         unlock_page(page);
2027                         break;
2028                 }
2029
2030                 if (wbc->sync_mode != WB_SYNC_NONE)
2031                         wait_on_page_writeback(page);
2032
2033                 if (PageWriteback(page) ||
2034                                 !clear_page_dirty_for_io(page)) {
2035                         unlock_page(page);
2036                         break;
2037                 }
2038
2039                 /*
2040                  * This actually clears the dirty bit in the radix tree.
2041                  * See cifs_writepage() for more commentary.
2042                  */
2043                 set_page_writeback(page);
2044                 if (page_offset(page) >= i_size_read(mapping->host)) {
2045                         *done = true;
2046                         unlock_page(page);
2047                         end_page_writeback(page);
2048                         break;
2049                 }
2050
2051                 wdata->pages[i] = page;
2052                 *next = page->index + 1;
2053                 ++nr_pages;
2054         }
2055
2056         /* reset index to refind any pages skipped */
2057         if (nr_pages == 0)
2058                 *index = wdata->pages[0]->index + 1;
2059
2060         /* put any pages we aren't going to use */
2061         for (i = nr_pages; i < found_pages; i++) {
2062                 put_page(wdata->pages[i]);
2063                 wdata->pages[i] = NULL;
2064         }
2065
2066         return nr_pages;
2067 }
2068
2069 static int
2070 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2071                  struct address_space *mapping, struct writeback_control *wbc)
2072 {
2073         int rc = 0;
2074         struct TCP_Server_Info *server;
2075         unsigned int i;
2076
2077         wdata->sync_mode = wbc->sync_mode;
2078         wdata->nr_pages = nr_pages;
2079         wdata->offset = page_offset(wdata->pages[0]);
2080         wdata->pagesz = PAGE_SIZE;
2081         wdata->tailsz = min(i_size_read(mapping->host) -
2082                         page_offset(wdata->pages[nr_pages - 1]),
2083                         (loff_t)PAGE_SIZE);
2084         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2085
2086         if (wdata->cfile != NULL)
2087                 cifsFileInfo_put(wdata->cfile);
2088         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2089         if (!wdata->cfile) {
2090                 cifs_dbg(VFS, "No writable handles for inode\n");
2091                 rc = -EBADF;
2092         } else {
2093                 wdata->pid = wdata->cfile->pid;
2094                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2095                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2096         }
2097
2098         for (i = 0; i < nr_pages; ++i)
2099                 unlock_page(wdata->pages[i]);
2100
2101         return rc;
2102 }
2103
2104 static int cifs_writepages(struct address_space *mapping,
2105                            struct writeback_control *wbc)
2106 {
2107         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2108         struct TCP_Server_Info *server;
2109         bool done = false, scanned = false, range_whole = false;
2110         pgoff_t end, index;
2111         struct cifs_writedata *wdata;
2112         int rc = 0;
2113         unsigned int xid;
2114
2115         /*
2116          * If wsize is smaller than the page cache size, default to writing
2117          * one page at a time via cifs_writepage
2118          */
2119         if (cifs_sb->wsize < PAGE_SIZE)
2120                 return generic_writepages(mapping, wbc);
2121
2122         xid = get_xid();
2123         if (wbc->range_cyclic) {
2124                 index = mapping->writeback_index; /* Start from prev offset */
2125                 end = -1;
2126         } else {
2127                 index = wbc->range_start >> PAGE_SHIFT;
2128                 end = wbc->range_end >> PAGE_SHIFT;
2129                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2130                         range_whole = true;
2131                 scanned = true;
2132         }
2133         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2134 retry:
2135         while (!done && index <= end) {
2136                 unsigned int i, nr_pages, found_pages, wsize, credits;
2137                 pgoff_t next = 0, tofind, saved_index = index;
2138
2139                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2140                                                    &wsize, &credits);
2141                 if (rc)
2142                         break;
2143
2144                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2145
2146                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2147                                                   &found_pages);
2148                 if (!wdata) {
2149                         rc = -ENOMEM;
2150                         add_credits_and_wake_if(server, credits, 0);
2151                         break;
2152                 }
2153
2154                 if (found_pages == 0) {
2155                         kref_put(&wdata->refcount, cifs_writedata_release);
2156                         add_credits_and_wake_if(server, credits, 0);
2157                         break;
2158                 }
2159
2160                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2161                                                end, &index, &next, &done);
2162
2163                 /* nothing to write? */
2164                 if (nr_pages == 0) {
2165                         kref_put(&wdata->refcount, cifs_writedata_release);
2166                         add_credits_and_wake_if(server, credits, 0);
2167                         continue;
2168                 }
2169
2170                 wdata->credits = credits;
2171
2172                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2173
2174                 /* send failure -- clean up the mess */
2175                 if (rc != 0) {
2176                         add_credits_and_wake_if(server, wdata->credits, 0);
2177                         for (i = 0; i < nr_pages; ++i) {
2178                                 if (rc == -EAGAIN)
2179                                         redirty_page_for_writepage(wbc,
2180                                                            wdata->pages[i]);
2181                                 else
2182                                         SetPageError(wdata->pages[i]);
2183                                 end_page_writeback(wdata->pages[i]);
2184                                 put_page(wdata->pages[i]);
2185                         }
2186                         if (rc != -EAGAIN)
2187                                 mapping_set_error(mapping, rc);
2188                 }
2189                 kref_put(&wdata->refcount, cifs_writedata_release);
2190
2191                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2192                         index = saved_index;
2193                         continue;
2194                 }
2195
2196                 wbc->nr_to_write -= nr_pages;
2197                 if (wbc->nr_to_write <= 0)
2198                         done = true;
2199
2200                 index = next;
2201         }
2202
2203         if (!scanned && !done) {
2204                 /*
2205                  * We hit the last page and there is more work to be done: wrap
2206                  * back to the start of the file
2207                  */
2208                 scanned = true;
2209                 index = 0;
2210                 goto retry;
2211         }
2212
2213         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2214                 mapping->writeback_index = index;
2215
2216         free_xid(xid);
2217         return rc;
2218 }
2219
2220 static int
2221 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2222 {
2223         int rc;
2224         unsigned int xid;
2225
2226         xid = get_xid();
2227 /* BB add check for wbc flags */
2228         get_page(page);
2229         if (!PageUptodate(page))
2230                 cifs_dbg(FYI, "ppw - page not up to date\n");
2231
2232         /*
2233          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2234          *
2235          * A writepage() implementation always needs to do either this,
2236          * or re-dirty the page with "redirty_page_for_writepage()" in
2237          * the case of a failure.
2238          *
2239          * Just unlocking the page will cause the radix tree tag-bits
2240          * to fail to update with the state of the page correctly.
2241          */
2242         set_page_writeback(page);
2243 retry_write:
2244         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2245         if (rc == -EAGAIN) {
2246                 if (wbc->sync_mode == WB_SYNC_ALL)
2247                         goto retry_write;
2248                 redirty_page_for_writepage(wbc, page);
2249         } else if (rc != 0) {
2250                 SetPageError(page);
2251                 mapping_set_error(page->mapping, rc);
2252         } else {
2253                 SetPageUptodate(page);
2254         }
2255         end_page_writeback(page);
2256         put_page(page);
2257         free_xid(xid);
2258         return rc;
2259 }
2260
2261 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2262 {
2263         int rc = cifs_writepage_locked(page, wbc);
2264         unlock_page(page);
2265         return rc;
2266 }
2267
2268 static int cifs_write_end(struct file *file, struct address_space *mapping,
2269                         loff_t pos, unsigned len, unsigned copied,
2270                         struct page *page, void *fsdata)
2271 {
2272         int rc;
2273         struct inode *inode = mapping->host;
2274         struct cifsFileInfo *cfile = file->private_data;
2275         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2276         __u32 pid;
2277
2278         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2279                 pid = cfile->pid;
2280         else
2281                 pid = current->tgid;
2282
2283         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2284                  page, pos, copied);
2285
2286         if (PageChecked(page)) {
2287                 if (copied == len)
2288                         SetPageUptodate(page);
2289                 ClearPageChecked(page);
2290         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2291                 SetPageUptodate(page);
2292
2293         if (!PageUptodate(page)) {
2294                 char *page_data;
2295                 unsigned offset = pos & (PAGE_SIZE - 1);
2296                 unsigned int xid;
2297
2298                 xid = get_xid();
2299                 /* this is probably better than directly calling
2300                    partialpage_write since in this function the file handle is
2301                    known which we might as well leverage */
2302                 /* BB check if anything else missing out of ppw
2303                    such as updating last write time */
2304                 page_data = kmap(page);
2305                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2306                 /* if (rc < 0) should we set writebehind rc? */
2307                 kunmap(page);
2308
2309                 free_xid(xid);
2310         } else {
2311                 rc = copied;
2312                 pos += copied;
2313                 set_page_dirty(page);
2314         }
2315
2316         if (rc > 0) {
2317                 spin_lock(&inode->i_lock);
2318                 if (pos > inode->i_size)
2319                         i_size_write(inode, pos);
2320                 spin_unlock(&inode->i_lock);
2321         }
2322
2323         unlock_page(page);
2324         put_page(page);
2325
2326         return rc;
2327 }
2328
2329 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2330                       int datasync)
2331 {
2332         unsigned int xid;
2333         int rc = 0;
2334         struct cifs_tcon *tcon;
2335         struct TCP_Server_Info *server;
2336         struct cifsFileInfo *smbfile = file->private_data;
2337         struct inode *inode = file_inode(file);
2338         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2339
2340         rc = file_write_and_wait_range(file, start, end);
2341         if (rc)
2342                 return rc;
2343         inode_lock(inode);
2344
2345         xid = get_xid();
2346
2347         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2348                  file, datasync);
2349
2350         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2351                 rc = cifs_zap_mapping(inode);
2352                 if (rc) {
2353                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2354                         rc = 0; /* don't care about it in fsync */
2355                 }
2356         }
2357
2358         tcon = tlink_tcon(smbfile->tlink);
2359         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2360                 server = tcon->ses->server;
2361                 if (server->ops->flush)
2362                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2363                 else
2364                         rc = -ENOSYS;
2365         }
2366
2367         free_xid(xid);
2368         inode_unlock(inode);
2369         return rc;
2370 }
2371
2372 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2373 {
2374         unsigned int xid;
2375         int rc = 0;
2376         struct cifs_tcon *tcon;
2377         struct TCP_Server_Info *server;
2378         struct cifsFileInfo *smbfile = file->private_data;
2379         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2380         struct inode *inode = file->f_mapping->host;
2381
2382         rc = file_write_and_wait_range(file, start, end);
2383         if (rc)
2384                 return rc;
2385         inode_lock(inode);
2386
2387         xid = get_xid();
2388
2389         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2390                  file, datasync);
2391
2392         tcon = tlink_tcon(smbfile->tlink);
2393         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2394                 server = tcon->ses->server;
2395                 if (server->ops->flush)
2396                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2397                 else
2398                         rc = -ENOSYS;
2399         }
2400
2401         free_xid(xid);
2402         inode_unlock(inode);
2403         return rc;
2404 }
2405
2406 /*
2407  * As file closes, flush all cached write data for this inode checking
2408  * for write behind errors.
2409  */
2410 int cifs_flush(struct file *file, fl_owner_t id)
2411 {
2412         struct inode *inode = file_inode(file);
2413         int rc = 0;
2414
2415         if (file->f_mode & FMODE_WRITE)
2416                 rc = filemap_write_and_wait(inode->i_mapping);
2417
2418         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2419
2420         return rc;
2421 }
2422
2423 static int
2424 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2425 {
2426         int rc = 0;
2427         unsigned long i;
2428
2429         for (i = 0; i < num_pages; i++) {
2430                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2431                 if (!pages[i]) {
2432                         /*
2433                          * save number of pages we have already allocated and
2434                          * return with ENOMEM error
2435                          */
2436                         num_pages = i;
2437                         rc = -ENOMEM;
2438                         break;
2439                 }
2440         }
2441
2442         if (rc) {
2443                 for (i = 0; i < num_pages; i++)
2444                         put_page(pages[i]);
2445         }
2446         return rc;
2447 }
2448
2449 static inline
2450 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2451 {
2452         size_t num_pages;
2453         size_t clen;
2454
2455         clen = min_t(const size_t, len, wsize);
2456         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2457
2458         if (cur_len)
2459                 *cur_len = clen;
2460
2461         return num_pages;
2462 }
2463
2464 static void
2465 cifs_uncached_writedata_release(struct kref *refcount)
2466 {
2467         int i;
2468         struct cifs_writedata *wdata = container_of(refcount,
2469                                         struct cifs_writedata, refcount);
2470
2471         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2472         for (i = 0; i < wdata->nr_pages; i++)
2473                 put_page(wdata->pages[i]);
2474         cifs_writedata_release(refcount);
2475 }
2476
2477 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2478
2479 static void
2480 cifs_uncached_writev_complete(struct work_struct *work)
2481 {
2482         struct cifs_writedata *wdata = container_of(work,
2483                                         struct cifs_writedata, work);
2484         struct inode *inode = d_inode(wdata->cfile->dentry);
2485         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2486
2487         spin_lock(&inode->i_lock);
2488         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2489         if (cifsi->server_eof > inode->i_size)
2490                 i_size_write(inode, cifsi->server_eof);
2491         spin_unlock(&inode->i_lock);
2492
2493         complete(&wdata->done);
2494         collect_uncached_write_data(wdata->ctx);
2495         /* the below call can possibly free the last ref to aio ctx */
2496         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2497 }
2498
2499 static int
2500 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2501                       size_t *len, unsigned long *num_pages)
2502 {
2503         size_t save_len, copied, bytes, cur_len = *len;
2504         unsigned long i, nr_pages = *num_pages;
2505
2506         save_len = cur_len;
2507         for (i = 0; i < nr_pages; i++) {
2508                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2509                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2510                 cur_len -= copied;
2511                 /*
2512                  * If we didn't copy as much as we expected, then that
2513                  * may mean we trod into an unmapped area. Stop copying
2514                  * at that point. On the next pass through the big
2515                  * loop, we'll likely end up getting a zero-length
2516                  * write and bailing out of it.
2517                  */
2518                 if (copied < bytes)
2519                         break;
2520         }
2521         cur_len = save_len - cur_len;
2522         *len = cur_len;
2523
2524         /*
2525          * If we have no data to send, then that probably means that
2526          * the copy above failed altogether. That's most likely because
2527          * the address in the iovec was bogus. Return -EFAULT and let
2528          * the caller free anything we allocated and bail out.
2529          */
2530         if (!cur_len)
2531                 return -EFAULT;
2532
2533         /*
2534          * i + 1 now represents the number of pages we actually used in
2535          * the copy phase above.
2536          */
2537         *num_pages = i + 1;
2538         return 0;
2539 }
2540
2541 static int
2542 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2543         struct cifs_aio_ctx *ctx)
2544 {
2545         unsigned int wsize, credits;
2546         int rc;
2547         struct TCP_Server_Info *server =
2548                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2549
2550         /*
2551          * Wait for credits to resend this wdata.
2552          * Note: we are attempting to resend the whole wdata not in segments
2553          */
2554         do {
2555                 rc = server->ops->wait_mtu_credits(
2556                         server, wdata->bytes, &wsize, &credits);
2557
2558                 if (rc)
2559                         goto out;
2560
2561                 if (wsize < wdata->bytes) {
2562                         add_credits_and_wake_if(server, credits, 0);
2563                         msleep(1000);
2564                 }
2565         } while (wsize < wdata->bytes);
2566
2567         rc = -EAGAIN;
2568         while (rc == -EAGAIN) {
2569                 rc = 0;
2570                 if (wdata->cfile->invalidHandle)
2571                         rc = cifs_reopen_file(wdata->cfile, false);
2572                 if (!rc)
2573                         rc = server->ops->async_writev(wdata,
2574                                         cifs_uncached_writedata_release);
2575         }
2576
2577         if (!rc) {
2578                 list_add_tail(&wdata->list, wdata_list);
2579                 return 0;
2580         }
2581
2582         add_credits_and_wake_if(server, wdata->credits, 0);
2583 out:
2584         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2585
2586         return rc;
2587 }
2588
2589 static int
2590 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2591                      struct cifsFileInfo *open_file,
2592                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2593                      struct cifs_aio_ctx *ctx)
2594 {
2595         int rc = 0;
2596         size_t cur_len;
2597         unsigned long nr_pages, num_pages, i;
2598         struct cifs_writedata *wdata;
2599         struct iov_iter saved_from = *from;
2600         loff_t saved_offset = offset;
2601         pid_t pid;
2602         struct TCP_Server_Info *server;
2603         struct page **pagevec;
2604         size_t start;
2605
2606         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2607                 pid = open_file->pid;
2608         else
2609                 pid = current->tgid;
2610
2611         server = tlink_tcon(open_file->tlink)->ses->server;
2612
2613         do {
2614                 unsigned int wsize, credits;
2615
2616                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2617                                                    &wsize, &credits);
2618                 if (rc)
2619                         break;
2620
2621                 cur_len = min_t(const size_t, len, wsize);
2622
2623                 if (ctx->direct_io) {
2624                         ssize_t result;
2625
2626                         result = iov_iter_get_pages_alloc(
2627                                 from, &pagevec, cur_len, &start);
2628                         if (result < 0) {
2629                                 cifs_dbg(VFS,
2630                                         "direct_writev couldn't get user pages "
2631                                         "(rc=%zd) iter type %d iov_offset %zd "
2632                                         "count %zd\n",
2633                                         result, from->type,
2634                                         from->iov_offset, from->count);
2635                                 dump_stack();
2636
2637                                 rc = result;
2638                                 add_credits_and_wake_if(server, credits, 0);
2639                                 break;
2640                         }
2641                         cur_len = (size_t)result;
2642                         iov_iter_advance(from, cur_len);
2643
2644                         nr_pages =
2645                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2646
2647                         wdata = cifs_writedata_direct_alloc(pagevec,
2648                                              cifs_uncached_writev_complete);
2649                         if (!wdata) {
2650                                 rc = -ENOMEM;
2651                                 add_credits_and_wake_if(server, credits, 0);
2652                                 break;
2653                         }
2654
2655
2656                         wdata->page_offset = start;
2657                         wdata->tailsz =
2658                                 nr_pages > 1 ?
2659                                         cur_len - (PAGE_SIZE - start) -
2660                                         (nr_pages - 2) * PAGE_SIZE :
2661                                         cur_len;
2662                 } else {
2663                         nr_pages = get_numpages(wsize, len, &cur_len);
2664                         wdata = cifs_writedata_alloc(nr_pages,
2665                                              cifs_uncached_writev_complete);
2666                         if (!wdata) {
2667                                 rc = -ENOMEM;
2668                                 add_credits_and_wake_if(server, credits, 0);
2669                                 break;
2670                         }
2671
2672                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2673                         if (rc) {
2674                                 kfree(wdata);
2675                                 add_credits_and_wake_if(server, credits, 0);
2676                                 break;
2677                         }
2678
2679                         num_pages = nr_pages;
2680                         rc = wdata_fill_from_iovec(
2681                                 wdata, from, &cur_len, &num_pages);
2682                         if (rc) {
2683                                 for (i = 0; i < nr_pages; i++)
2684                                         put_page(wdata->pages[i]);
2685                                 kfree(wdata);
2686                                 add_credits_and_wake_if(server, credits, 0);
2687                                 break;
2688                         }
2689
2690                         /*
2691                          * Bring nr_pages down to the number of pages we
2692                          * actually used, and free any pages that we didn't use.
2693                          */
2694                         for ( ; nr_pages > num_pages; nr_pages--)
2695                                 put_page(wdata->pages[nr_pages - 1]);
2696
2697                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2698                 }
2699
2700                 wdata->sync_mode = WB_SYNC_ALL;
2701                 wdata->nr_pages = nr_pages;
2702                 wdata->offset = (__u64)offset;
2703                 wdata->cfile = cifsFileInfo_get(open_file);
2704                 wdata->pid = pid;
2705                 wdata->bytes = cur_len;
2706                 wdata->pagesz = PAGE_SIZE;
2707                 wdata->credits = credits;
2708                 wdata->ctx = ctx;
2709                 kref_get(&ctx->refcount);
2710
2711                 if (!wdata->cfile->invalidHandle ||
2712                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2713                         rc = server->ops->async_writev(wdata,
2714                                         cifs_uncached_writedata_release);
2715                 if (rc) {
2716                         add_credits_and_wake_if(server, wdata->credits, 0);
2717                         kref_put(&wdata->refcount,
2718                                  cifs_uncached_writedata_release);
2719                         if (rc == -EAGAIN) {
2720                                 *from = saved_from;
2721                                 iov_iter_advance(from, offset - saved_offset);
2722                                 continue;
2723                         }
2724                         break;
2725                 }
2726
2727                 list_add_tail(&wdata->list, wdata_list);
2728                 offset += cur_len;
2729                 len -= cur_len;
2730         } while (len > 0);
2731
2732         return rc;
2733 }
2734
2735 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2736 {
2737         struct cifs_writedata *wdata, *tmp;
2738         struct cifs_tcon *tcon;
2739         struct cifs_sb_info *cifs_sb;
2740         struct dentry *dentry = ctx->cfile->dentry;
2741         unsigned int i;
2742         int rc;
2743
2744         tcon = tlink_tcon(ctx->cfile->tlink);
2745         cifs_sb = CIFS_SB(dentry->d_sb);
2746
2747         mutex_lock(&ctx->aio_mutex);
2748
2749         if (list_empty(&ctx->list)) {
2750                 mutex_unlock(&ctx->aio_mutex);
2751                 return;
2752         }
2753
2754         rc = ctx->rc;
2755         /*
2756          * Wait for and collect replies for any successful sends in order of
2757          * increasing offset. Once an error is hit, then return without waiting
2758          * for any more replies.
2759          */
2760 restart_loop:
2761         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2762                 if (!rc) {
2763                         if (!try_wait_for_completion(&wdata->done)) {
2764                                 mutex_unlock(&ctx->aio_mutex);
2765                                 return;
2766                         }
2767
2768                         if (wdata->result)
2769                                 rc = wdata->result;
2770                         else
2771                                 ctx->total_len += wdata->bytes;
2772
2773                         /* resend call if it's a retryable error */
2774                         if (rc == -EAGAIN) {
2775                                 struct list_head tmp_list;
2776                                 struct iov_iter tmp_from = ctx->iter;
2777
2778                                 INIT_LIST_HEAD(&tmp_list);
2779                                 list_del_init(&wdata->list);
2780
2781                                 if (ctx->direct_io)
2782                                         rc = cifs_resend_wdata(
2783                                                 wdata, &tmp_list, ctx);
2784                                 else {
2785                                         iov_iter_advance(&tmp_from,
2786                                                  wdata->offset - ctx->pos);
2787
2788                                         rc = cifs_write_from_iter(wdata->offset,
2789                                                 wdata->bytes, &tmp_from,
2790                                                 ctx->cfile, cifs_sb, &tmp_list,
2791                                                 ctx);
2792                                 }
2793
2794                                 list_splice(&tmp_list, &ctx->list);
2795
2796                                 kref_put(&wdata->refcount,
2797                                          cifs_uncached_writedata_release);
2798                                 goto restart_loop;
2799                         }
2800                 }
2801                 list_del_init(&wdata->list);
2802                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2803         }
2804
2805         if (!ctx->direct_io)
2806                 for (i = 0; i < ctx->npages; i++)
2807                         put_page(ctx->bv[i].bv_page);
2808
2809         cifs_stats_bytes_written(tcon, ctx->total_len);
2810         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2811
2812         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2813
2814         mutex_unlock(&ctx->aio_mutex);
2815
2816         if (ctx->iocb && ctx->iocb->ki_complete)
2817                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2818         else
2819                 complete(&ctx->done);
2820 }
2821
2822 static ssize_t __cifs_writev(
2823         struct kiocb *iocb, struct iov_iter *from, bool direct)
2824 {
2825         struct file *file = iocb->ki_filp;
2826         ssize_t total_written = 0;
2827         struct cifsFileInfo *cfile;
2828         struct cifs_tcon *tcon;
2829         struct cifs_sb_info *cifs_sb;
2830         struct cifs_aio_ctx *ctx;
2831         struct iov_iter saved_from = *from;
2832         size_t len = iov_iter_count(from);
2833         int rc;
2834
2835         /*
2836          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2837          * In this case, fall back to non-direct write function.
2838          * this could be improved by getting pages directly in ITER_KVEC
2839          */
2840         if (direct && from->type & ITER_KVEC) {
2841                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2842                 direct = false;
2843         }
2844
2845         rc = generic_write_checks(iocb, from);
2846         if (rc <= 0)
2847                 return rc;
2848
2849         cifs_sb = CIFS_FILE_SB(file);
2850         cfile = file->private_data;
2851         tcon = tlink_tcon(cfile->tlink);
2852
2853         if (!tcon->ses->server->ops->async_writev)
2854                 return -ENOSYS;
2855
2856         ctx = cifs_aio_ctx_alloc();
2857         if (!ctx)
2858                 return -ENOMEM;
2859
2860         ctx->cfile = cifsFileInfo_get(cfile);
2861
2862         if (!is_sync_kiocb(iocb))
2863                 ctx->iocb = iocb;
2864
2865         ctx->pos = iocb->ki_pos;
2866
2867         if (direct) {
2868                 ctx->direct_io = true;
2869                 ctx->iter = *from;
2870                 ctx->len = len;
2871         } else {
2872                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2873                 if (rc) {
2874                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2875                         return rc;
2876                 }
2877         }
2878
2879         /* grab a lock here due to read response handlers can access ctx */
2880         mutex_lock(&ctx->aio_mutex);
2881
2882         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2883                                   cfile, cifs_sb, &ctx->list, ctx);
2884
2885         /*
2886          * If at least one write was successfully sent, then discard any rc
2887          * value from the later writes. If the other write succeeds, then
2888          * we'll end up returning whatever was written. If it fails, then
2889          * we'll get a new rc value from that.
2890          */
2891         if (!list_empty(&ctx->list))
2892                 rc = 0;
2893
2894         mutex_unlock(&ctx->aio_mutex);
2895
2896         if (rc) {
2897                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2898                 return rc;
2899         }
2900
2901         if (!is_sync_kiocb(iocb)) {
2902                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2903                 return -EIOCBQUEUED;
2904         }
2905
2906         rc = wait_for_completion_killable(&ctx->done);
2907         if (rc) {
2908                 mutex_lock(&ctx->aio_mutex);
2909                 ctx->rc = rc = -EINTR;
2910                 total_written = ctx->total_len;
2911                 mutex_unlock(&ctx->aio_mutex);
2912         } else {
2913                 rc = ctx->rc;
2914                 total_written = ctx->total_len;
2915         }
2916
2917         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2918
2919         if (unlikely(!total_written))
2920                 return rc;
2921
2922         iocb->ki_pos += total_written;
2923         return total_written;
2924 }
2925
2926 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
2927 {
2928         return __cifs_writev(iocb, from, true);
2929 }
2930
2931 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2932 {
2933         return __cifs_writev(iocb, from, false);
2934 }
2935
2936 static ssize_t
2937 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2938 {
2939         struct file *file = iocb->ki_filp;
2940         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2941         struct inode *inode = file->f_mapping->host;
2942         struct cifsInodeInfo *cinode = CIFS_I(inode);
2943         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2944         ssize_t rc;
2945
2946         inode_lock(inode);
2947         /*
2948          * We need to hold the sem to be sure nobody modifies lock list
2949          * with a brlock that prevents writing.
2950          */
2951         down_read(&cinode->lock_sem);
2952
2953         rc = generic_write_checks(iocb, from);
2954         if (rc <= 0)
2955                 goto out;
2956
2957         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2958                                      server->vals->exclusive_lock_type, 0,
2959                                      NULL, CIFS_WRITE_OP))
2960                 rc = __generic_file_write_iter(iocb, from);
2961         else
2962                 rc = -EACCES;
2963 out:
2964         up_read(&cinode->lock_sem);
2965         inode_unlock(inode);
2966
2967         if (rc > 0)
2968                 rc = generic_write_sync(iocb, rc);
2969         return rc;
2970 }
2971
2972 ssize_t
2973 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2974 {
2975         struct inode *inode = file_inode(iocb->ki_filp);
2976         struct cifsInodeInfo *cinode = CIFS_I(inode);
2977         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2978         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2979                                                 iocb->ki_filp->private_data;
2980         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2981         ssize_t written;
2982
2983         written = cifs_get_writer(cinode);
2984         if (written)
2985                 return written;
2986
2987         if (CIFS_CACHE_WRITE(cinode)) {
2988                 if (cap_unix(tcon->ses) &&
2989                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2990                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2991                         written = generic_file_write_iter(iocb, from);
2992                         goto out;
2993                 }
2994                 written = cifs_writev(iocb, from);
2995                 goto out;
2996         }
2997         /*
2998          * For non-oplocked files in strict cache mode we need to write the data
2999          * to the server exactly from the pos to pos+len-1 rather than flush all
3000          * affected pages because it may cause a error with mandatory locks on
3001          * these pages but not on the region from pos to ppos+len-1.
3002          */
3003         written = cifs_user_writev(iocb, from);
3004         if (written > 0 && CIFS_CACHE_READ(cinode)) {
3005                 /*
3006                  * Windows 7 server can delay breaking level2 oplock if a write
3007                  * request comes - break it on the client to prevent reading
3008                  * an old data.
3009                  */
3010                 cifs_zap_mapping(inode);
3011                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
3012                          inode);
3013                 cinode->oplock = 0;
3014         }
3015 out:
3016         cifs_put_writer(cinode);
3017         return written;
3018 }
3019
3020 static struct cifs_readdata *
3021 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3022 {
3023         struct cifs_readdata *rdata;
3024
3025         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3026         if (rdata != NULL) {
3027                 rdata->pages = pages;
3028                 kref_init(&rdata->refcount);
3029                 INIT_LIST_HEAD(&rdata->list);
3030                 init_completion(&rdata->done);
3031                 INIT_WORK(&rdata->work, complete);
3032         }
3033
3034         return rdata;
3035 }
3036
3037 static struct cifs_readdata *
3038 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3039 {
3040         struct page **pages =
3041                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3042         struct cifs_readdata *ret = NULL;
3043
3044         if (pages) {
3045                 ret = cifs_readdata_direct_alloc(pages, complete);
3046                 if (!ret)
3047                         kfree(pages);
3048         }
3049
3050         return ret;
3051 }
3052
3053 void
3054 cifs_readdata_release(struct kref *refcount)
3055 {
3056         struct cifs_readdata *rdata = container_of(refcount,
3057                                         struct cifs_readdata, refcount);
3058 #ifdef CONFIG_CIFS_SMB_DIRECT
3059         if (rdata->mr) {
3060                 smbd_deregister_mr(rdata->mr);
3061                 rdata->mr = NULL;
3062         }
3063 #endif
3064         if (rdata->cfile)
3065                 cifsFileInfo_put(rdata->cfile);
3066
3067         kvfree(rdata->pages);
3068         kfree(rdata);
3069 }
3070
3071 static int
3072 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3073 {
3074         int rc = 0;
3075         struct page *page;
3076         unsigned int i;
3077
3078         for (i = 0; i < nr_pages; i++) {
3079                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3080                 if (!page) {
3081                         rc = -ENOMEM;
3082                         break;
3083                 }
3084                 rdata->pages[i] = page;
3085         }
3086
3087         if (rc) {
3088                 for (i = 0; i < nr_pages; i++) {
3089                         put_page(rdata->pages[i]);
3090                         rdata->pages[i] = NULL;
3091                 }
3092         }
3093         return rc;
3094 }
3095
3096 static void
3097 cifs_uncached_readdata_release(struct kref *refcount)
3098 {
3099         struct cifs_readdata *rdata = container_of(refcount,
3100                                         struct cifs_readdata, refcount);
3101         unsigned int i;
3102
3103         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3104         for (i = 0; i < rdata->nr_pages; i++) {
3105                 put_page(rdata->pages[i]);
3106         }
3107         cifs_readdata_release(refcount);
3108 }
3109
3110 /**
3111  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3112  * @rdata:      the readdata response with list of pages holding data
3113  * @iter:       destination for our data
3114  *
3115  * This function copies data from a list of pages in a readdata response into
3116  * an array of iovecs. It will first calculate where the data should go
3117  * based on the info in the readdata and then copy the data into that spot.
3118  */
3119 static int
3120 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3121 {
3122         size_t remaining = rdata->got_bytes;
3123         unsigned int i;
3124
3125         for (i = 0; i < rdata->nr_pages; i++) {
3126                 struct page *page = rdata->pages[i];
3127                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3128                 size_t written;
3129
3130                 if (unlikely(iov_iter_is_pipe(iter))) {
3131                         void *addr = kmap_atomic(page);
3132
3133                         written = copy_to_iter(addr, copy, iter);
3134                         kunmap_atomic(addr);
3135                 } else
3136                         written = copy_page_to_iter(page, 0, copy, iter);
3137                 remaining -= written;
3138                 if (written < copy && iov_iter_count(iter) > 0)
3139                         break;
3140         }
3141         return remaining ? -EFAULT : 0;
3142 }
3143
3144 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3145
3146 static void
3147 cifs_uncached_readv_complete(struct work_struct *work)
3148 {
3149         struct cifs_readdata *rdata = container_of(work,
3150                                                 struct cifs_readdata, work);
3151
3152         complete(&rdata->done);
3153         collect_uncached_read_data(rdata->ctx);
3154         /* the below call can possibly free the last ref to aio ctx */
3155         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3156 }
3157
3158 static int
3159 uncached_fill_pages(struct TCP_Server_Info *server,
3160                     struct cifs_readdata *rdata, struct iov_iter *iter,
3161                     unsigned int len)
3162 {
3163         int result = 0;
3164         unsigned int i;
3165         unsigned int nr_pages = rdata->nr_pages;
3166         unsigned int page_offset = rdata->page_offset;
3167
3168         rdata->got_bytes = 0;
3169         rdata->tailsz = PAGE_SIZE;
3170         for (i = 0; i < nr_pages; i++) {
3171                 struct page *page = rdata->pages[i];
3172                 size_t n;
3173                 unsigned int segment_size = rdata->pagesz;
3174
3175                 if (i == 0)
3176                         segment_size -= page_offset;
3177                 else
3178                         page_offset = 0;
3179
3180
3181                 if (len <= 0) {
3182                         /* no need to hold page hostage */
3183                         rdata->pages[i] = NULL;
3184                         rdata->nr_pages--;
3185                         put_page(page);
3186                         continue;
3187                 }
3188
3189                 n = len;
3190                 if (len >= segment_size)
3191                         /* enough data to fill the page */
3192                         n = segment_size;
3193                 else
3194                         rdata->tailsz = len;
3195                 len -= n;
3196
3197                 if (iter)
3198                         result = copy_page_from_iter(
3199                                         page, page_offset, n, iter);
3200 #ifdef CONFIG_CIFS_SMB_DIRECT
3201                 else if (rdata->mr)
3202                         result = n;
3203 #endif
3204                 else
3205                         result = cifs_read_page_from_socket(
3206                                         server, page, page_offset, n);
3207                 if (result < 0)
3208                         break;
3209
3210                 rdata->got_bytes += result;
3211         }
3212
3213         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3214                                                 rdata->got_bytes : result;
3215 }
3216
3217 static int
3218 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3219                               struct cifs_readdata *rdata, unsigned int len)
3220 {
3221         return uncached_fill_pages(server, rdata, NULL, len);
3222 }
3223
3224 static int
3225 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3226                               struct cifs_readdata *rdata,
3227                               struct iov_iter *iter)
3228 {
3229         return uncached_fill_pages(server, rdata, iter, iter->count);
3230 }
3231
3232 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3233                         struct list_head *rdata_list,
3234                         struct cifs_aio_ctx *ctx)
3235 {
3236         unsigned int rsize, credits;
3237         int rc;
3238         struct TCP_Server_Info *server =
3239                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3240
3241         /*
3242          * Wait for credits to resend this rdata.
3243          * Note: we are attempting to resend the whole rdata not in segments
3244          */
3245         do {
3246                 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3247                                                 &rsize, &credits);
3248
3249                 if (rc)
3250                         goto out;
3251
3252                 if (rsize < rdata->bytes) {
3253                         add_credits_and_wake_if(server, credits, 0);
3254                         msleep(1000);
3255                 }
3256         } while (rsize < rdata->bytes);
3257
3258         rc = -EAGAIN;
3259         while (rc == -EAGAIN) {
3260                 rc = 0;
3261                 if (rdata->cfile->invalidHandle)
3262                         rc = cifs_reopen_file(rdata->cfile, true);
3263                 if (!rc)
3264                         rc = server->ops->async_readv(rdata);
3265         }
3266
3267         if (!rc) {
3268                 /* Add to aio pending list */
3269                 list_add_tail(&rdata->list, rdata_list);
3270                 return 0;
3271         }
3272
3273         add_credits_and_wake_if(server, rdata->credits, 0);
3274 out:
3275         kref_put(&rdata->refcount,
3276                 cifs_uncached_readdata_release);
3277
3278         return rc;
3279 }
3280
3281 static int
3282 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3283                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3284                      struct cifs_aio_ctx *ctx)
3285 {
3286         struct cifs_readdata *rdata;
3287         unsigned int npages, rsize, credits;
3288         size_t cur_len;
3289         int rc;
3290         pid_t pid;
3291         struct TCP_Server_Info *server;
3292         struct page **pagevec;
3293         size_t start;
3294         struct iov_iter direct_iov = ctx->iter;
3295
3296         server = tlink_tcon(open_file->tlink)->ses->server;
3297
3298         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3299                 pid = open_file->pid;
3300         else
3301                 pid = current->tgid;
3302
3303         if (ctx->direct_io)
3304                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3305
3306         do {
3307                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3308                                                    &rsize, &credits);
3309                 if (rc)
3310                         break;
3311
3312                 cur_len = min_t(const size_t, len, rsize);
3313
3314                 if (ctx->direct_io) {
3315                         ssize_t result;
3316
3317                         result = iov_iter_get_pages_alloc(
3318                                         &direct_iov, &pagevec,
3319                                         cur_len, &start);
3320                         if (result < 0) {
3321                                 cifs_dbg(VFS,
3322                                         "couldn't get user pages (rc=%zd)"
3323                                         " iter type %d"
3324                                         " iov_offset %zd count %zd\n",
3325                                         result, direct_iov.type,
3326                                         direct_iov.iov_offset,
3327                                         direct_iov.count);
3328                                 dump_stack();
3329
3330                                 rc = result;
3331                                 add_credits_and_wake_if(server, credits, 0);
3332                                 break;
3333                         }
3334                         cur_len = (size_t)result;
3335                         iov_iter_advance(&direct_iov, cur_len);
3336
3337                         rdata = cifs_readdata_direct_alloc(
3338                                         pagevec, cifs_uncached_readv_complete);
3339                         if (!rdata) {
3340                                 add_credits_and_wake_if(server, credits, 0);
3341                                 rc = -ENOMEM;
3342                                 break;
3343                         }
3344
3345                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3346                         rdata->page_offset = start;
3347                         rdata->tailsz = npages > 1 ?
3348                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3349                                 cur_len;
3350
3351                 } else {
3352
3353                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3354                         /* allocate a readdata struct */
3355                         rdata = cifs_readdata_alloc(npages,
3356                                             cifs_uncached_readv_complete);
3357                         if (!rdata) {
3358                                 add_credits_and_wake_if(server, credits, 0);
3359                                 rc = -ENOMEM;
3360                                 break;
3361                         }
3362
3363                         rc = cifs_read_allocate_pages(rdata, npages);
3364                         if (rc)
3365                                 goto error;
3366
3367                         rdata->tailsz = PAGE_SIZE;
3368                 }
3369
3370                 rdata->cfile = cifsFileInfo_get(open_file);
3371                 rdata->nr_pages = npages;
3372                 rdata->offset = offset;
3373                 rdata->bytes = cur_len;
3374                 rdata->pid = pid;
3375                 rdata->pagesz = PAGE_SIZE;
3376                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3377                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3378                 rdata->credits = credits;
3379                 rdata->ctx = ctx;
3380                 kref_get(&ctx->refcount);
3381
3382                 if (!rdata->cfile->invalidHandle ||
3383                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3384                         rc = server->ops->async_readv(rdata);
3385 error:
3386                 if (rc) {
3387                         add_credits_and_wake_if(server, rdata->credits, 0);
3388                         kref_put(&rdata->refcount,
3389                                 cifs_uncached_readdata_release);
3390                         if (rc == -EAGAIN) {
3391                                 iov_iter_revert(&direct_iov, cur_len);
3392                                 continue;
3393                         }
3394                         break;
3395                 }
3396
3397                 list_add_tail(&rdata->list, rdata_list);
3398                 offset += cur_len;
3399                 len -= cur_len;
3400         } while (len > 0);
3401
3402         return rc;
3403 }
3404
3405 static void
3406 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3407 {
3408         struct cifs_readdata *rdata, *tmp;
3409         struct iov_iter *to = &ctx->iter;
3410         struct cifs_sb_info *cifs_sb;
3411         struct cifs_tcon *tcon;
3412         unsigned int i;
3413         int rc;
3414
3415         tcon = tlink_tcon(ctx->cfile->tlink);
3416         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3417
3418         mutex_lock(&ctx->aio_mutex);
3419
3420         if (list_empty(&ctx->list)) {
3421                 mutex_unlock(&ctx->aio_mutex);
3422                 return;
3423         }
3424
3425         rc = ctx->rc;
3426         /* the loop below should proceed in the order of increasing offsets */
3427 again:
3428         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3429                 if (!rc) {
3430                         if (!try_wait_for_completion(&rdata->done)) {
3431                                 mutex_unlock(&ctx->aio_mutex);
3432                                 return;
3433                         }
3434
3435                         if (rdata->result == -EAGAIN) {
3436                                 /* resend call if it's a retryable error */
3437                                 struct list_head tmp_list;
3438                                 unsigned int got_bytes = rdata->got_bytes;
3439
3440                                 list_del_init(&rdata->list);
3441                                 INIT_LIST_HEAD(&tmp_list);
3442
3443                                 /*
3444                                  * Got a part of data and then reconnect has
3445                                  * happened -- fill the buffer and continue
3446                                  * reading.
3447                                  */
3448                                 if (got_bytes && got_bytes < rdata->bytes) {
3449                                         rc = 0;
3450                                         if (!ctx->direct_io)
3451                                                 rc = cifs_readdata_to_iov(rdata, to);
3452                                         if (rc) {
3453                                                 kref_put(&rdata->refcount,
3454                                                         cifs_uncached_readdata_release);
3455                                                 continue;
3456                                         }
3457                                 }
3458
3459                                 if (ctx->direct_io) {
3460                                         /*
3461                                          * Re-use rdata as this is a
3462                                          * direct I/O
3463                                          */
3464                                         rc = cifs_resend_rdata(
3465                                                 rdata,
3466                                                 &tmp_list, ctx);
3467                                 } else {
3468                                         rc = cifs_send_async_read(
3469                                                 rdata->offset + got_bytes,
3470                                                 rdata->bytes - got_bytes,
3471                                                 rdata->cfile, cifs_sb,
3472                                                 &tmp_list, ctx);
3473
3474                                         kref_put(&rdata->refcount,
3475                                                 cifs_uncached_readdata_release);
3476                                 }
3477
3478                                 list_splice(&tmp_list, &ctx->list);
3479
3480                                 goto again;
3481                         } else if (rdata->result)
3482                                 rc = rdata->result;
3483                         else if (!ctx->direct_io)
3484                                 rc = cifs_readdata_to_iov(rdata, to);
3485
3486                         /* if there was a short read -- discard anything left */
3487                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3488                                 rc = -ENODATA;
3489
3490                         ctx->total_len += rdata->got_bytes;
3491                 }
3492                 list_del_init(&rdata->list);
3493                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3494         }
3495
3496         if (!ctx->direct_io) {
3497                 for (i = 0; i < ctx->npages; i++) {
3498                         if (ctx->should_dirty)
3499                                 set_page_dirty(ctx->bv[i].bv_page);
3500                         put_page(ctx->bv[i].bv_page);
3501                 }
3502
3503                 ctx->total_len = ctx->len - iov_iter_count(to);
3504         }
3505
3506         cifs_stats_bytes_read(tcon, ctx->total_len);
3507
3508         /* mask nodata case */
3509         if (rc == -ENODATA)
3510                 rc = 0;
3511
3512         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3513
3514         mutex_unlock(&ctx->aio_mutex);
3515
3516         if (ctx->iocb && ctx->iocb->ki_complete)
3517                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3518         else
3519                 complete(&ctx->done);
3520 }
3521
3522 static ssize_t __cifs_readv(
3523         struct kiocb *iocb, struct iov_iter *to, bool direct)
3524 {
3525         size_t len;
3526         struct file *file = iocb->ki_filp;
3527         struct cifs_sb_info *cifs_sb;
3528         struct cifsFileInfo *cfile;
3529         struct cifs_tcon *tcon;
3530         ssize_t rc, total_read = 0;
3531         loff_t offset = iocb->ki_pos;
3532         struct cifs_aio_ctx *ctx;
3533
3534         /*
3535          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3536          * fall back to data copy read path
3537          * this could be improved by getting pages directly in ITER_KVEC
3538          */
3539         if (direct && to->type & ITER_KVEC) {
3540                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3541                 direct = false;
3542         }
3543
3544         len = iov_iter_count(to);
3545         if (!len)
3546                 return 0;
3547
3548         cifs_sb = CIFS_FILE_SB(file);
3549         cfile = file->private_data;
3550         tcon = tlink_tcon(cfile->tlink);
3551
3552         if (!tcon->ses->server->ops->async_readv)
3553                 return -ENOSYS;
3554
3555         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3556                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3557
3558         ctx = cifs_aio_ctx_alloc();
3559         if (!ctx)
3560                 return -ENOMEM;
3561
3562         ctx->cfile = cifsFileInfo_get(cfile);
3563
3564         if (!is_sync_kiocb(iocb))
3565                 ctx->iocb = iocb;
3566
3567         if (iter_is_iovec(to))
3568                 ctx->should_dirty = true;
3569
3570         if (direct) {
3571                 ctx->pos = offset;
3572                 ctx->direct_io = true;
3573                 ctx->iter = *to;
3574                 ctx->len = len;
3575         } else {
3576                 rc = setup_aio_ctx_iter(ctx, to, READ);
3577                 if (rc) {
3578                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3579                         return rc;
3580                 }
3581                 len = ctx->len;
3582         }
3583
3584         /* grab a lock here due to read response handlers can access ctx */
3585         mutex_lock(&ctx->aio_mutex);
3586
3587         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3588
3589         /* if at least one read request send succeeded, then reset rc */
3590         if (!list_empty(&ctx->list))
3591                 rc = 0;
3592
3593         mutex_unlock(&ctx->aio_mutex);
3594
3595         if (rc) {
3596                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3597                 return rc;
3598         }
3599
3600         if (!is_sync_kiocb(iocb)) {
3601                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3602                 return -EIOCBQUEUED;
3603         }
3604
3605         rc = wait_for_completion_killable(&ctx->done);
3606         if (rc) {
3607                 mutex_lock(&ctx->aio_mutex);
3608                 ctx->rc = rc = -EINTR;
3609                 total_read = ctx->total_len;
3610                 mutex_unlock(&ctx->aio_mutex);
3611         } else {
3612                 rc = ctx->rc;
3613                 total_read = ctx->total_len;
3614         }
3615
3616         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3617
3618         if (total_read) {
3619                 iocb->ki_pos += total_read;
3620                 return total_read;
3621         }
3622         return rc;
3623 }
3624
3625 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3626 {
3627         return __cifs_readv(iocb, to, true);
3628 }
3629
3630 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3631 {
3632         return __cifs_readv(iocb, to, false);
3633 }
3634
3635 ssize_t
3636 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3637 {
3638         struct inode *inode = file_inode(iocb->ki_filp);
3639         struct cifsInodeInfo *cinode = CIFS_I(inode);
3640         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3641         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3642                                                 iocb->ki_filp->private_data;
3643         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3644         int rc = -EACCES;
3645
3646         /*
3647          * In strict cache mode we need to read from the server all the time
3648          * if we don't have level II oplock because the server can delay mtime
3649          * change - so we can't make a decision about inode invalidating.
3650          * And we can also fail with pagereading if there are mandatory locks
3651          * on pages affected by this read but not on the region from pos to
3652          * pos+len-1.
3653          */
3654         if (!CIFS_CACHE_READ(cinode))
3655                 return cifs_user_readv(iocb, to);
3656
3657         if (cap_unix(tcon->ses) &&
3658             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3659             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3660                 return generic_file_read_iter(iocb, to);
3661
3662         /*
3663          * We need to hold the sem to be sure nobody modifies lock list
3664          * with a brlock that prevents reading.
3665          */
3666         down_read(&cinode->lock_sem);
3667         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3668                                      tcon->ses->server->vals->shared_lock_type,
3669                                      0, NULL, CIFS_READ_OP))
3670                 rc = generic_file_read_iter(iocb, to);
3671         up_read(&cinode->lock_sem);
3672         return rc;
3673 }
3674
3675 static ssize_t
3676 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3677 {
3678         int rc = -EACCES;
3679         unsigned int bytes_read = 0;
3680         unsigned int total_read;
3681         unsigned int current_read_size;
3682         unsigned int rsize;
3683         struct cifs_sb_info *cifs_sb;
3684         struct cifs_tcon *tcon;
3685         struct TCP_Server_Info *server;
3686         unsigned int xid;
3687         char *cur_offset;
3688         struct cifsFileInfo *open_file;
3689         struct cifs_io_parms io_parms;
3690         int buf_type = CIFS_NO_BUFFER;
3691         __u32 pid;
3692
3693         xid = get_xid();
3694         cifs_sb = CIFS_FILE_SB(file);
3695
3696         /* FIXME: set up handlers for larger reads and/or convert to async */
3697         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3698
3699         if (file->private_data == NULL) {
3700                 rc = -EBADF;
3701                 free_xid(xid);
3702                 return rc;
3703         }
3704         open_file = file->private_data;
3705         tcon = tlink_tcon(open_file->tlink);
3706         server = tcon->ses->server;
3707
3708         if (!server->ops->sync_read) {
3709                 free_xid(xid);
3710                 return -ENOSYS;
3711         }
3712
3713         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3714                 pid = open_file->pid;
3715         else
3716                 pid = current->tgid;
3717
3718         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3719                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3720
3721         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3722              total_read += bytes_read, cur_offset += bytes_read) {
3723                 do {
3724                         current_read_size = min_t(uint, read_size - total_read,
3725                                                   rsize);
3726                         /*
3727                          * For windows me and 9x we do not want to request more
3728                          * than it negotiated since it will refuse the read
3729                          * then.
3730                          */
3731                         if ((tcon->ses) && !(tcon->ses->capabilities &
3732                                 tcon->ses->server->vals->cap_large_files)) {
3733                                 current_read_size = min_t(uint,
3734                                         current_read_size, CIFSMaxBufSize);
3735                         }
3736                         if (open_file->invalidHandle) {
3737                                 rc = cifs_reopen_file(open_file, true);
3738                                 if (rc != 0)
3739                                         break;
3740                         }
3741                         io_parms.pid = pid;
3742                         io_parms.tcon = tcon;
3743                         io_parms.offset = *offset;
3744                         io_parms.length = current_read_size;
3745                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3746                                                     &bytes_read, &cur_offset,
3747                                                     &buf_type);
3748                 } while (rc == -EAGAIN);
3749
3750                 if (rc || (bytes_read == 0)) {
3751                         if (total_read) {
3752                                 break;
3753                         } else {
3754                                 free_xid(xid);
3755                                 return rc;
3756                         }
3757                 } else {
3758                         cifs_stats_bytes_read(tcon, total_read);
3759                         *offset += bytes_read;
3760                 }
3761         }
3762         free_xid(xid);
3763         return total_read;
3764 }
3765
3766 /*
3767  * If the page is mmap'ed into a process' page tables, then we need to make
3768  * sure that it doesn't change while being written back.
3769  */
3770 static vm_fault_t
3771 cifs_page_mkwrite(struct vm_fault *vmf)
3772 {
3773         struct page *page = vmf->page;
3774
3775         lock_page(page);
3776         return VM_FAULT_LOCKED;
3777 }
3778
3779 static const struct vm_operations_struct cifs_file_vm_ops = {
3780         .fault = filemap_fault,
3781         .map_pages = filemap_map_pages,
3782         .page_mkwrite = cifs_page_mkwrite,
3783 };
3784
3785 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3786 {
3787         int xid, rc = 0;
3788         struct inode *inode = file_inode(file);
3789
3790         xid = get_xid();
3791
3792         if (!CIFS_CACHE_READ(CIFS_I(inode)))
3793                 rc = cifs_zap_mapping(inode);
3794         if (!rc)
3795                 rc = generic_file_mmap(file, vma);
3796         if (!rc)
3797                 vma->vm_ops = &cifs_file_vm_ops;
3798
3799         free_xid(xid);
3800         return rc;
3801 }
3802
3803 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3804 {
3805         int rc, xid;
3806
3807         xid = get_xid();
3808
3809         rc = cifs_revalidate_file(file);
3810         if (rc)
3811                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3812                          rc);
3813         if (!rc)
3814                 rc = generic_file_mmap(file, vma);
3815         if (!rc)
3816                 vma->vm_ops = &cifs_file_vm_ops;
3817
3818         free_xid(xid);
3819         return rc;
3820 }
3821
3822 static void
3823 cifs_readv_complete(struct work_struct *work)
3824 {
3825         unsigned int i, got_bytes;
3826         struct cifs_readdata *rdata = container_of(work,
3827                                                 struct cifs_readdata, work);
3828
3829         got_bytes = rdata->got_bytes;
3830         for (i = 0; i < rdata->nr_pages; i++) {
3831                 struct page *page = rdata->pages[i];
3832
3833                 lru_cache_add_file(page);
3834
3835                 if (rdata->result == 0 ||
3836                     (rdata->result == -EAGAIN && got_bytes)) {
3837                         flush_dcache_page(page);
3838                         SetPageUptodate(page);
3839                 }
3840
3841                 unlock_page(page);
3842
3843                 if (rdata->result == 0 ||
3844                     (rdata->result == -EAGAIN && got_bytes))
3845                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3846
3847                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3848
3849                 put_page(page);
3850                 rdata->pages[i] = NULL;
3851         }
3852         kref_put(&rdata->refcount, cifs_readdata_release);
3853 }
3854
3855 static int
3856 readpages_fill_pages(struct TCP_Server_Info *server,
3857                      struct cifs_readdata *rdata, struct iov_iter *iter,
3858                      unsigned int len)
3859 {
3860         int result = 0;
3861         unsigned int i;
3862         u64 eof;
3863         pgoff_t eof_index;
3864         unsigned int nr_pages = rdata->nr_pages;
3865         unsigned int page_offset = rdata->page_offset;
3866
3867         /* determine the eof that the server (probably) has */
3868         eof = CIFS_I(rdata->mapping->host)->server_eof;
3869         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3870         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3871
3872         rdata->got_bytes = 0;
3873         rdata->tailsz = PAGE_SIZE;
3874         for (i = 0; i < nr_pages; i++) {
3875                 struct page *page = rdata->pages[i];
3876                 unsigned int to_read = rdata->pagesz;
3877                 size_t n;
3878
3879                 if (i == 0)
3880                         to_read -= page_offset;
3881                 else
3882                         page_offset = 0;
3883
3884                 n = to_read;
3885
3886                 if (len >= to_read) {
3887                         len -= to_read;
3888                 } else if (len > 0) {
3889                         /* enough for partial page, fill and zero the rest */
3890                         zero_user(page, len + page_offset, to_read - len);
3891                         n = rdata->tailsz = len;
3892                         len = 0;
3893                 } else if (page->index > eof_index) {
3894                         /*
3895                          * The VFS will not try to do readahead past the
3896                          * i_size, but it's possible that we have outstanding
3897                          * writes with gaps in the middle and the i_size hasn't
3898                          * caught up yet. Populate those with zeroed out pages
3899                          * to prevent the VFS from repeatedly attempting to
3900                          * fill them until the writes are flushed.
3901                          */
3902                         zero_user(page, 0, PAGE_SIZE);
3903                         lru_cache_add_file(page);
3904                         flush_dcache_page(page);
3905                         SetPageUptodate(page);
3906                         unlock_page(page);
3907                         put_page(page);
3908                         rdata->pages[i] = NULL;
3909                         rdata->nr_pages--;
3910                         continue;
3911                 } else {
3912                         /* no need to hold page hostage */
3913                         lru_cache_add_file(page);
3914                         unlock_page(page);
3915                         put_page(page);
3916                         rdata->pages[i] = NULL;
3917                         rdata->nr_pages--;
3918                         continue;
3919                 }
3920
3921                 if (iter)
3922                         result = copy_page_from_iter(
3923                                         page, page_offset, n, iter);
3924 #ifdef CONFIG_CIFS_SMB_DIRECT
3925                 else if (rdata->mr)
3926                         result = n;
3927 #endif
3928                 else
3929                         result = cifs_read_page_from_socket(
3930                                         server, page, page_offset, n);
3931                 if (result < 0)
3932                         break;
3933
3934                 rdata->got_bytes += result;
3935         }
3936
3937         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3938                                                 rdata->got_bytes : result;
3939 }
3940
3941 static int
3942 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3943                                struct cifs_readdata *rdata, unsigned int len)
3944 {
3945         return readpages_fill_pages(server, rdata, NULL, len);
3946 }
3947
3948 static int
3949 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3950                                struct cifs_readdata *rdata,
3951                                struct iov_iter *iter)
3952 {
3953         return readpages_fill_pages(server, rdata, iter, iter->count);
3954 }
3955
3956 static int
3957 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3958                     unsigned int rsize, struct list_head *tmplist,
3959                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3960 {
3961         struct page *page, *tpage;
3962         unsigned int expected_index;
3963         int rc;
3964         gfp_t gfp = readahead_gfp_mask(mapping);
3965
3966         INIT_LIST_HEAD(tmplist);
3967
3968         page = lru_to_page(page_list);
3969
3970         /*
3971          * Lock the page and put it in the cache. Since no one else
3972          * should have access to this page, we're safe to simply set
3973          * PG_locked without checking it first.
3974          */
3975         __SetPageLocked(page);
3976         rc = add_to_page_cache_locked(page, mapping,
3977                                       page->index, gfp);
3978
3979         /* give up if we can't stick it in the cache */
3980         if (rc) {
3981                 __ClearPageLocked(page);
3982                 return rc;
3983         }
3984
3985         /* move first page to the tmplist */
3986         *offset = (loff_t)page->index << PAGE_SHIFT;
3987         *bytes = PAGE_SIZE;
3988         *nr_pages = 1;
3989         list_move_tail(&page->lru, tmplist);
3990
3991         /* now try and add more pages onto the request */
3992         expected_index = page->index + 1;
3993         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3994                 /* discontinuity ? */
3995                 if (page->index != expected_index)
3996                         break;
3997
3998                 /* would this page push the read over the rsize? */
3999                 if (*bytes + PAGE_SIZE > rsize)
4000                         break;
4001
4002                 __SetPageLocked(page);
4003                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4004                         __ClearPageLocked(page);
4005                         break;
4006                 }
4007                 list_move_tail(&page->lru, tmplist);
4008                 (*bytes) += PAGE_SIZE;
4009                 expected_index++;
4010                 (*nr_pages)++;
4011         }
4012         return rc;
4013 }
4014
4015 static int cifs_readpages(struct file *file, struct address_space *mapping,
4016         struct list_head *page_list, unsigned num_pages)
4017 {
4018         int rc;
4019         struct list_head tmplist;
4020         struct cifsFileInfo *open_file = file->private_data;
4021         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4022         struct TCP_Server_Info *server;
4023         pid_t pid;
4024         unsigned int xid;
4025
4026         xid = get_xid();
4027         /*
4028          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4029          * immediately if the cookie is negative
4030          *
4031          * After this point, every page in the list might have PG_fscache set,
4032          * so we will need to clean that up off of every page we don't use.
4033          */
4034         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4035                                          &num_pages);
4036         if (rc == 0) {
4037                 free_xid(xid);
4038                 return rc;
4039         }
4040
4041         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4042                 pid = open_file->pid;
4043         else
4044                 pid = current->tgid;
4045
4046         rc = 0;
4047         server = tlink_tcon(open_file->tlink)->ses->server;
4048
4049         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4050                  __func__, file, mapping, num_pages);
4051
4052         /*
4053          * Start with the page at end of list and move it to private
4054          * list. Do the same with any following pages until we hit
4055          * the rsize limit, hit an index discontinuity, or run out of
4056          * pages. Issue the async read and then start the loop again
4057          * until the list is empty.
4058          *
4059          * Note that list order is important. The page_list is in
4060          * the order of declining indexes. When we put the pages in
4061          * the rdata->pages, then we want them in increasing order.
4062          */
4063         while (!list_empty(page_list)) {
4064                 unsigned int i, nr_pages, bytes, rsize;
4065                 loff_t offset;
4066                 struct page *page, *tpage;
4067                 struct cifs_readdata *rdata;
4068                 unsigned credits;
4069
4070                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4071                                                    &rsize, &credits);
4072                 if (rc)
4073                         break;
4074
4075                 /*
4076                  * Give up immediately if rsize is too small to read an entire
4077                  * page. The VFS will fall back to readpage. We should never
4078                  * reach this point however since we set ra_pages to 0 when the
4079                  * rsize is smaller than a cache page.
4080                  */
4081                 if (unlikely(rsize < PAGE_SIZE)) {
4082                         add_credits_and_wake_if(server, credits, 0);
4083                         free_xid(xid);
4084                         return 0;
4085                 }
4086
4087                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4088                                          &nr_pages, &offset, &bytes);
4089                 if (rc) {
4090                         add_credits_and_wake_if(server, credits, 0);
4091                         break;
4092                 }
4093
4094                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4095                 if (!rdata) {
4096                         /* best to give up if we're out of mem */
4097                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4098                                 list_del(&page->lru);
4099                                 lru_cache_add_file(page);
4100                                 unlock_page(page);
4101                                 put_page(page);
4102                         }
4103                         rc = -ENOMEM;
4104                         add_credits_and_wake_if(server, credits, 0);
4105                         break;
4106                 }
4107
4108                 rdata->cfile = cifsFileInfo_get(open_file);
4109                 rdata->mapping = mapping;
4110                 rdata->offset = offset;
4111                 rdata->bytes = bytes;
4112                 rdata->pid = pid;
4113                 rdata->pagesz = PAGE_SIZE;
4114                 rdata->tailsz = PAGE_SIZE;
4115                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4116                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4117                 rdata->credits = credits;
4118
4119                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4120                         list_del(&page->lru);
4121                         rdata->pages[rdata->nr_pages++] = page;
4122                 }
4123
4124                 if (!rdata->cfile->invalidHandle ||
4125                     !(rc = cifs_reopen_file(rdata->cfile, true)))
4126                         rc = server->ops->async_readv(rdata);
4127                 if (rc) {
4128                         add_credits_and_wake_if(server, rdata->credits, 0);
4129                         for (i = 0; i < rdata->nr_pages; i++) {
4130                                 page = rdata->pages[i];
4131                                 lru_cache_add_file(page);
4132                                 unlock_page(page);
4133                                 put_page(page);
4134                         }
4135                         /* Fallback to the readpage in error/reconnect cases */
4136                         kref_put(&rdata->refcount, cifs_readdata_release);
4137                         break;
4138                 }
4139
4140                 kref_put(&rdata->refcount, cifs_readdata_release);
4141         }
4142
4143         /* Any pages that have been shown to fscache but didn't get added to
4144          * the pagecache must be uncached before they get returned to the
4145          * allocator.
4146          */
4147         cifs_fscache_readpages_cancel(mapping->host, page_list);
4148         free_xid(xid);
4149         return rc;
4150 }
4151
4152 /*
4153  * cifs_readpage_worker must be called with the page pinned
4154  */
4155 static int cifs_readpage_worker(struct file *file, struct page *page,
4156         loff_t *poffset)
4157 {
4158         char *read_data;
4159         int rc;
4160
4161         /* Is the page cached? */
4162         rc = cifs_readpage_from_fscache(file_inode(file), page);
4163         if (rc == 0)
4164                 goto read_complete;
4165
4166         read_data = kmap(page);
4167         /* for reads over a certain size could initiate async read ahead */
4168
4169         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4170
4171         if (rc < 0)
4172                 goto io_error;
4173         else
4174                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4175
4176         /* we do not want atime to be less than mtime, it broke some apps */
4177         file_inode(file)->i_atime = current_time(file_inode(file));
4178         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4179                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4180         else
4181                 file_inode(file)->i_atime = current_time(file_inode(file));
4182
4183         if (PAGE_SIZE > rc)
4184                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4185
4186         flush_dcache_page(page);
4187         SetPageUptodate(page);
4188
4189         /* send this page to the cache */
4190         cifs_readpage_to_fscache(file_inode(file), page);
4191
4192         rc = 0;
4193
4194 io_error:
4195         kunmap(page);
4196         unlock_page(page);
4197
4198 read_complete:
4199         return rc;
4200 }
4201
4202 static int cifs_readpage(struct file *file, struct page *page)
4203 {
4204         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4205         int rc = -EACCES;
4206         unsigned int xid;
4207
4208         xid = get_xid();
4209
4210         if (file->private_data == NULL) {
4211                 rc = -EBADF;
4212                 free_xid(xid);
4213                 return rc;
4214         }
4215
4216         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4217                  page, (int)offset, (int)offset);
4218
4219         rc = cifs_readpage_worker(file, page, &offset);
4220
4221         free_xid(xid);
4222         return rc;
4223 }
4224
4225 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4226 {
4227         struct cifsFileInfo *open_file;
4228         struct cifs_tcon *tcon =
4229                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4230
4231         spin_lock(&tcon->open_file_lock);
4232         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4233                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4234                         spin_unlock(&tcon->open_file_lock);
4235                         return 1;
4236                 }
4237         }
4238         spin_unlock(&tcon->open_file_lock);
4239         return 0;
4240 }
4241
4242 /* We do not want to update the file size from server for inodes
4243    open for write - to avoid races with writepage extending
4244    the file - in the future we could consider allowing
4245    refreshing the inode only on increases in the file size
4246    but this is tricky to do without racing with writebehind
4247    page caching in the current Linux kernel design */
4248 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4249 {
4250         if (!cifsInode)
4251                 return true;
4252
4253         if (is_inode_writable(cifsInode)) {
4254                 /* This inode is open for write at least once */
4255                 struct cifs_sb_info *cifs_sb;
4256
4257                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4258                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4259                         /* since no page cache to corrupt on directio
4260                         we can change size safely */
4261                         return true;
4262                 }
4263
4264                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4265                         return true;
4266
4267                 return false;
4268         } else
4269                 return true;
4270 }
4271
4272 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4273                         loff_t pos, unsigned len, unsigned flags,
4274                         struct page **pagep, void **fsdata)
4275 {
4276         int oncethru = 0;
4277         pgoff_t index = pos >> PAGE_SHIFT;
4278         loff_t offset = pos & (PAGE_SIZE - 1);
4279         loff_t page_start = pos & PAGE_MASK;
4280         loff_t i_size;
4281         struct page *page;
4282         int rc = 0;
4283
4284         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4285
4286 start:
4287         page = grab_cache_page_write_begin(mapping, index, flags);
4288         if (!page) {
4289                 rc = -ENOMEM;
4290                 goto out;
4291         }
4292
4293         if (PageUptodate(page))
4294                 goto out;
4295
4296         /*
4297          * If we write a full page it will be up to date, no need to read from
4298          * the server. If the write is short, we'll end up doing a sync write
4299          * instead.
4300          */
4301         if (len == PAGE_SIZE)
4302                 goto out;
4303
4304         /*
4305          * optimize away the read when we have an oplock, and we're not
4306          * expecting to use any of the data we'd be reading in. That
4307          * is, when the page lies beyond the EOF, or straddles the EOF
4308          * and the write will cover all of the existing data.
4309          */
4310         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4311                 i_size = i_size_read(mapping->host);
4312                 if (page_start >= i_size ||
4313                     (offset == 0 && (pos + len) >= i_size)) {
4314                         zero_user_segments(page, 0, offset,
4315                                            offset + len,
4316                                            PAGE_SIZE);
4317                         /*
4318                          * PageChecked means that the parts of the page
4319                          * to which we're not writing are considered up
4320                          * to date. Once the data is copied to the
4321                          * page, it can be set uptodate.
4322                          */
4323                         SetPageChecked(page);
4324                         goto out;
4325                 }
4326         }
4327
4328         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4329                 /*
4330                  * might as well read a page, it is fast enough. If we get
4331                  * an error, we don't need to return it. cifs_write_end will
4332                  * do a sync write instead since PG_uptodate isn't set.
4333                  */
4334                 cifs_readpage_worker(file, page, &page_start);
4335                 put_page(page);
4336                 oncethru = 1;
4337                 goto start;
4338         } else {
4339                 /* we could try using another file handle if there is one -
4340                    but how would we lock it to prevent close of that handle
4341                    racing with this read? In any case
4342                    this will be written out by write_end so is fine */
4343         }
4344 out:
4345         *pagep = page;
4346         return rc;
4347 }
4348
4349 static int cifs_release_page(struct page *page, gfp_t gfp)
4350 {
4351         if (PagePrivate(page))
4352                 return 0;
4353
4354         return cifs_fscache_release_page(page, gfp);
4355 }
4356
4357 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4358                                  unsigned int length)
4359 {
4360         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4361
4362         if (offset == 0 && length == PAGE_SIZE)
4363                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4364 }
4365
4366 static int cifs_launder_page(struct page *page)
4367 {
4368         int rc = 0;
4369         loff_t range_start = page_offset(page);
4370         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4371         struct writeback_control wbc = {
4372                 .sync_mode = WB_SYNC_ALL,
4373                 .nr_to_write = 0,
4374                 .range_start = range_start,
4375                 .range_end = range_end,
4376         };
4377
4378         cifs_dbg(FYI, "Launder page: %p\n", page);
4379
4380         if (clear_page_dirty_for_io(page))
4381                 rc = cifs_writepage_locked(page, &wbc);
4382
4383         cifs_fscache_invalidate_page(page, page->mapping->host);
4384         return rc;
4385 }
4386
4387 void cifs_oplock_break(struct work_struct *work)
4388 {
4389         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4390                                                   oplock_break);
4391         struct inode *inode = d_inode(cfile->dentry);
4392         struct cifsInodeInfo *cinode = CIFS_I(inode);
4393         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4394         struct TCP_Server_Info *server = tcon->ses->server;
4395         int rc = 0;
4396
4397         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4398                         TASK_UNINTERRUPTIBLE);
4399
4400         server->ops->downgrade_oplock(server, cinode,
4401                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4402
4403         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4404                                                 cifs_has_mand_locks(cinode)) {
4405                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4406                          inode);
4407                 cinode->oplock = 0;
4408         }
4409
4410         if (inode && S_ISREG(inode->i_mode)) {
4411                 if (CIFS_CACHE_READ(cinode))
4412                         break_lease(inode, O_RDONLY);
4413                 else
4414                         break_lease(inode, O_WRONLY);
4415                 rc = filemap_fdatawrite(inode->i_mapping);
4416                 if (!CIFS_CACHE_READ(cinode)) {
4417                         rc = filemap_fdatawait(inode->i_mapping);
4418                         mapping_set_error(inode->i_mapping, rc);
4419                         cifs_zap_mapping(inode);
4420                 }
4421                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4422         }
4423
4424         rc = cifs_push_locks(cfile);
4425         if (rc)
4426                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4427
4428         /*
4429          * releasing stale oplock after recent reconnect of smb session using
4430          * a now incorrect file handle is not a data integrity issue but do
4431          * not bother sending an oplock release if session to server still is
4432          * disconnected since oplock already released by the server
4433          */
4434         if (!cfile->oplock_break_cancelled) {
4435                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4436                                                              cinode);
4437                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4438         }
4439         cifs_done_oplock_break(cinode);
4440 }
4441
4442 /*
4443  * The presence of cifs_direct_io() in the address space ops vector
4444  * allowes open() O_DIRECT flags which would have failed otherwise.
4445  *
4446  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4447  * so this method should never be called.
4448  *
4449  * Direct IO is not yet supported in the cached mode. 
4450  */
4451 static ssize_t
4452 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4453 {
4454         /*
4455          * FIXME
4456          * Eventually need to support direct IO for non forcedirectio mounts
4457          */
4458         return -EINVAL;
4459 }
4460
4461
4462 const struct address_space_operations cifs_addr_ops = {
4463         .readpage = cifs_readpage,
4464         .readpages = cifs_readpages,
4465         .writepage = cifs_writepage,
4466         .writepages = cifs_writepages,
4467         .write_begin = cifs_write_begin,
4468         .write_end = cifs_write_end,
4469         .set_page_dirty = __set_page_dirty_nobuffers,
4470         .releasepage = cifs_release_page,
4471         .direct_IO = cifs_direct_io,
4472         .invalidatepage = cifs_invalidate_page,
4473         .launder_page = cifs_launder_page,
4474 };
4475
4476 /*
4477  * cifs_readpages requires the server to support a buffer large enough to
4478  * contain the header plus one complete page of data.  Otherwise, we need
4479  * to leave cifs_readpages out of the address space operations.
4480  */
4481 const struct address_space_operations cifs_addr_ops_smallbuf = {
4482         .readpage = cifs_readpage,
4483         .writepage = cifs_writepage,
4484         .writepages = cifs_writepages,
4485         .write_begin = cifs_write_begin,
4486         .write_end = cifs_write_end,
4487         .set_page_dirty = __set_page_dirty_nobuffers,
4488         .releasepage = cifs_release_page,
4489         .invalidatepage = cifs_invalidate_page,
4490         .launder_page = cifs_launder_page,
4491 };