cifs: convert oplock breaks to use slow_work facility (try #4)
[safe/jmp/linux-2.6] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2007
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <asm/div64.h>
35 #include "cifsfs.h"
36 #include "cifspdu.h"
37 #include "cifsglob.h"
38 #include "cifsproto.h"
39 #include "cifs_unicode.h"
40 #include "cifs_debug.h"
41 #include "cifs_fs_sb.h"
42
43 static inline struct cifsFileInfo *cifs_init_private(
44         struct cifsFileInfo *private_data, struct inode *inode,
45         struct file *file, __u16 netfid)
46 {
47         memset(private_data, 0, sizeof(struct cifsFileInfo));
48         private_data->netfid = netfid;
49         private_data->pid = current->tgid;
50         mutex_init(&private_data->fh_mutex);
51         mutex_init(&private_data->lock_mutex);
52         INIT_LIST_HEAD(&private_data->llist);
53         private_data->pfile = file; /* needed for writepage */
54         private_data->pInode = igrab(inode);
55         private_data->mnt = file->f_path.mnt;
56         private_data->invalidHandle = false;
57         private_data->closePend = false;
58         /* Initialize reference count to one.  The private data is
59         freed on the release of the last reference */
60         atomic_set(&private_data->count, 1);
61         slow_work_init(&private_data->oplock_break, &cifs_oplock_break_ops);
62
63         return private_data;
64 }
65
66 static inline int cifs_convert_flags(unsigned int flags)
67 {
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 return GENERIC_READ;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 return GENERIC_WRITE;
72         else if ((flags & O_ACCMODE) == O_RDWR) {
73                 /* GENERIC_ALL is too much permission to request
74                    can cause unnecessary access denied on create */
75                 /* return GENERIC_ALL; */
76                 return (GENERIC_READ | GENERIC_WRITE);
77         }
78
79         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
80                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
81                 FILE_READ_DATA);
82 }
83
84 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
85 {
86         fmode_t posix_flags = 0;
87
88         if ((flags & O_ACCMODE) == O_RDONLY)
89                 posix_flags = FMODE_READ;
90         else if ((flags & O_ACCMODE) == O_WRONLY)
91                 posix_flags = FMODE_WRITE;
92         else if ((flags & O_ACCMODE) == O_RDWR) {
93                 /* GENERIC_ALL is too much permission to request
94                    can cause unnecessary access denied on create */
95                 /* return GENERIC_ALL; */
96                 posix_flags = FMODE_READ | FMODE_WRITE;
97         }
98         /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
99            reopening a file.  They had their effect on the original open */
100         if (flags & O_APPEND)
101                 posix_flags |= (fmode_t)O_APPEND;
102         if (flags & O_SYNC)
103                 posix_flags |= (fmode_t)O_SYNC;
104         if (flags & O_DIRECTORY)
105                 posix_flags |= (fmode_t)O_DIRECTORY;
106         if (flags & O_NOFOLLOW)
107                 posix_flags |= (fmode_t)O_NOFOLLOW;
108         if (flags & O_DIRECT)
109                 posix_flags |= (fmode_t)O_DIRECT;
110
111         return posix_flags;
112 }
113
114 static inline int cifs_get_disposition(unsigned int flags)
115 {
116         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
117                 return FILE_CREATE;
118         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
119                 return FILE_OVERWRITE_IF;
120         else if ((flags & O_CREAT) == O_CREAT)
121                 return FILE_OPEN_IF;
122         else if ((flags & O_TRUNC) == O_TRUNC)
123                 return FILE_OVERWRITE;
124         else
125                 return FILE_OPEN;
126 }
127
128 /* all arguments to this function must be checked for validity in caller */
129 static inline int
130 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
131                              struct cifsInodeInfo *pCifsInode,
132                              struct cifsFileInfo *pCifsFile, __u32 oplock,
133                              u16 netfid)
134 {
135
136         write_lock(&GlobalSMBSeslock);
137
138         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
139         if (pCifsInode == NULL) {
140                 write_unlock(&GlobalSMBSeslock);
141                 return -EINVAL;
142         }
143
144         if (pCifsInode->clientCanCacheRead) {
145                 /* we have the inode open somewhere else
146                    no need to discard cache data */
147                 goto psx_client_can_cache;
148         }
149
150         /* BB FIXME need to fix this check to move it earlier into posix_open
151            BB  fIX following section BB FIXME */
152
153         /* if not oplocked, invalidate inode pages if mtime or file
154            size changed */
155 /*      temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
156         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
157                            (file->f_path.dentry->d_inode->i_size ==
158                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
159                 cFYI(1, ("inode unchanged on server"));
160         } else {
161                 if (file->f_path.dentry->d_inode->i_mapping) {
162                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
163                         if (rc != 0)
164                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
165                 }
166                 cFYI(1, ("invalidating remote inode since open detected it "
167                          "changed"));
168                 invalidate_remote_inode(file->f_path.dentry->d_inode);
169         } */
170
171 psx_client_can_cache:
172         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
173                 pCifsInode->clientCanCacheAll = true;
174                 pCifsInode->clientCanCacheRead = true;
175                 cFYI(1, ("Exclusive Oplock granted on inode %p",
176                          file->f_path.dentry->d_inode));
177         } else if ((oplock & 0xF) == OPLOCK_READ)
178                 pCifsInode->clientCanCacheRead = true;
179
180         /* will have to change the unlock if we reenable the
181            filemap_fdatawrite (which does not seem necessary */
182         write_unlock(&GlobalSMBSeslock);
183         return 0;
184 }
185
186 static struct cifsFileInfo *
187 cifs_fill_filedata(struct file *file)
188 {
189         struct list_head *tmp;
190         struct cifsFileInfo *pCifsFile = NULL;
191         struct cifsInodeInfo *pCifsInode = NULL;
192
193         /* search inode for this file and fill in file->private_data */
194         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
195         read_lock(&GlobalSMBSeslock);
196         list_for_each(tmp, &pCifsInode->openFileList) {
197                 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
198                 if ((pCifsFile->pfile == NULL) &&
199                     (pCifsFile->pid == current->tgid)) {
200                         /* mode set in cifs_create */
201
202                         /* needed for writepage */
203                         pCifsFile->pfile = file;
204                         file->private_data = pCifsFile;
205                         break;
206                 }
207         }
208         read_unlock(&GlobalSMBSeslock);
209
210         if (file->private_data != NULL) {
211                 return pCifsFile;
212         } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
213                         cERROR(1, ("could not find file instance for "
214                                    "new file %p", file));
215         return NULL;
216 }
217
218 /* all arguments to this function must be checked for validity in caller */
219 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
220         struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
221         struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
222         char *full_path, int xid)
223 {
224         struct timespec temp;
225         int rc;
226
227         /* want handles we can use to read with first
228            in the list so we do not have to walk the
229            list to search for one in write_begin */
230         if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
231                 list_add_tail(&pCifsFile->flist,
232                               &pCifsInode->openFileList);
233         } else {
234                 list_add(&pCifsFile->flist,
235                          &pCifsInode->openFileList);
236         }
237         write_unlock(&GlobalSMBSeslock);
238         if (pCifsInode->clientCanCacheRead) {
239                 /* we have the inode open somewhere else
240                    no need to discard cache data */
241                 goto client_can_cache;
242         }
243
244         /* BB need same check in cifs_create too? */
245         /* if not oplocked, invalidate inode pages if mtime or file
246            size changed */
247         temp = cifs_NTtimeToUnix(buf->LastWriteTime);
248         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
249                            (file->f_path.dentry->d_inode->i_size ==
250                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
251                 cFYI(1, ("inode unchanged on server"));
252         } else {
253                 if (file->f_path.dentry->d_inode->i_mapping) {
254                 /* BB no need to lock inode until after invalidate
255                    since namei code should already have it locked? */
256                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
257                         if (rc != 0)
258                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
259                 }
260                 cFYI(1, ("invalidating remote inode since open detected it "
261                          "changed"));
262                 invalidate_remote_inode(file->f_path.dentry->d_inode);
263         }
264
265 client_can_cache:
266         if (pTcon->unix_ext)
267                 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
268                         full_path, inode->i_sb, xid);
269         else
270                 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
271                         full_path, buf, inode->i_sb, xid, NULL);
272
273         if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
274                 pCifsInode->clientCanCacheAll = true;
275                 pCifsInode->clientCanCacheRead = true;
276                 cFYI(1, ("Exclusive Oplock granted on inode %p",
277                          file->f_path.dentry->d_inode));
278         } else if ((*oplock & 0xF) == OPLOCK_READ)
279                 pCifsInode->clientCanCacheRead = true;
280
281         return rc;
282 }
283
284 int cifs_open(struct inode *inode, struct file *file)
285 {
286         int rc = -EACCES;
287         int xid;
288         __u32 oplock;
289         struct cifs_sb_info *cifs_sb;
290         struct cifsTconInfo *tcon;
291         struct cifsFileInfo *pCifsFile;
292         struct cifsInodeInfo *pCifsInode;
293         char *full_path = NULL;
294         int desiredAccess;
295         int disposition;
296         __u16 netfid;
297         FILE_ALL_INFO *buf = NULL;
298
299         xid = GetXid();
300
301         cifs_sb = CIFS_SB(inode->i_sb);
302         tcon = cifs_sb->tcon;
303
304         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
305         pCifsFile = cifs_fill_filedata(file);
306         if (pCifsFile) {
307                 rc = 0;
308                 FreeXid(xid);
309                 return rc;
310         }
311
312         full_path = build_path_from_dentry(file->f_path.dentry);
313         if (full_path == NULL) {
314                 rc = -ENOMEM;
315                 FreeXid(xid);
316                 return rc;
317         }
318
319         cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
320                  inode, file->f_flags, full_path));
321
322         if (oplockEnabled)
323                 oplock = REQ_OPLOCK;
324         else
325                 oplock = 0;
326
327         if (!tcon->broken_posix_open && tcon->unix_ext &&
328             (tcon->ses->capabilities & CAP_UNIX) &&
329             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
330                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
331                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
332                 /* can not refresh inode info since size could be stale */
333                 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
334                                      cifs_sb->mnt_file_mode /* ignored */,
335                                      oflags, &oplock, &netfid, xid);
336                 if (rc == 0) {
337                         cFYI(1, ("posix open succeeded"));
338                         /* no need for special case handling of setting mode
339                            on read only files needed here */
340
341                         pCifsFile = cifs_fill_filedata(file);
342                         cifs_posix_open_inode_helper(inode, file, pCifsInode,
343                                                      pCifsFile, oplock, netfid);
344                         goto out;
345                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
346                         if (tcon->ses->serverNOS)
347                                 cERROR(1, ("server %s of type %s returned"
348                                            " unexpected error on SMB posix open"
349                                            ", disabling posix open support."
350                                            " Check if server update available.",
351                                            tcon->ses->serverName,
352                                            tcon->ses->serverNOS));
353                         tcon->broken_posix_open = true;
354                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
355                          (rc != -EOPNOTSUPP)) /* path not found or net err */
356                         goto out;
357                 /* else fallthrough to retry open the old way on network i/o
358                    or DFS errors */
359         }
360
361         desiredAccess = cifs_convert_flags(file->f_flags);
362
363 /*********************************************************************
364  *  open flag mapping table:
365  *
366  *      POSIX Flag            CIFS Disposition
367  *      ----------            ----------------
368  *      O_CREAT               FILE_OPEN_IF
369  *      O_CREAT | O_EXCL      FILE_CREATE
370  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
371  *      O_TRUNC               FILE_OVERWRITE
372  *      none of the above     FILE_OPEN
373  *
374  *      Note that there is not a direct match between disposition
375  *      FILE_SUPERSEDE (ie create whether or not file exists although
376  *      O_CREAT | O_TRUNC is similar but truncates the existing
377  *      file rather than creating a new file as FILE_SUPERSEDE does
378  *      (which uses the attributes / metadata passed in on open call)
379  *?
380  *?  O_SYNC is a reasonable match to CIFS writethrough flag
381  *?  and the read write flags match reasonably.  O_LARGEFILE
382  *?  is irrelevant because largefile support is always used
383  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
384  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
385  *********************************************************************/
386
387         disposition = cifs_get_disposition(file->f_flags);
388
389         /* BB pass O_SYNC flag through on file attributes .. BB */
390
391         /* Also refresh inode by passing in file_info buf returned by SMBOpen
392            and calling get_inode_info with returned buf (at least helps
393            non-Unix server case) */
394
395         /* BB we can not do this if this is the second open of a file
396            and the first handle has writebehind data, we might be
397            able to simply do a filemap_fdatawrite/filemap_fdatawait first */
398         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
399         if (!buf) {
400                 rc = -ENOMEM;
401                 goto out;
402         }
403
404         if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
405                 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
406                          desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
407                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
408                                  & CIFS_MOUNT_MAP_SPECIAL_CHR);
409         else
410                 rc = -EIO; /* no NT SMB support fall into legacy open below */
411
412         if (rc == -EIO) {
413                 /* Old server, try legacy style OpenX */
414                 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
415                         desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
416                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
417                                 & CIFS_MOUNT_MAP_SPECIAL_CHR);
418         }
419         if (rc) {
420                 cFYI(1, ("cifs_open returned 0x%x", rc));
421                 goto out;
422         }
423         file->private_data =
424                 kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
425         if (file->private_data == NULL) {
426                 rc = -ENOMEM;
427                 goto out;
428         }
429         pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
430         write_lock(&GlobalSMBSeslock);
431         list_add(&pCifsFile->tlist, &tcon->openFileList);
432
433         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
434         if (pCifsInode) {
435                 rc = cifs_open_inode_helper(inode, file, pCifsInode,
436                                             pCifsFile, tcon,
437                                             &oplock, buf, full_path, xid);
438         } else {
439                 write_unlock(&GlobalSMBSeslock);
440         }
441
442         if (oplock & CIFS_CREATE_ACTION) {
443                 /* time to set mode which we can not set earlier due to
444                    problems creating new read-only files */
445                 if (tcon->unix_ext) {
446                         struct cifs_unix_set_info_args args = {
447                                 .mode   = inode->i_mode,
448                                 .uid    = NO_CHANGE_64,
449                                 .gid    = NO_CHANGE_64,
450                                 .ctime  = NO_CHANGE_64,
451                                 .atime  = NO_CHANGE_64,
452                                 .mtime  = NO_CHANGE_64,
453                                 .device = 0,
454                         };
455                         CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
456                                                cifs_sb->local_nls,
457                                                cifs_sb->mnt_cifs_flags &
458                                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
459                 }
460         }
461
462 out:
463         kfree(buf);
464         kfree(full_path);
465         FreeXid(xid);
466         return rc;
467 }
468
469 /* Try to reacquire byte range locks that were released when session */
470 /* to server was lost */
471 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
472 {
473         int rc = 0;
474
475 /* BB list all locks open on this file and relock */
476
477         return rc;
478 }
479
480 static int cifs_reopen_file(struct file *file, bool can_flush)
481 {
482         int rc = -EACCES;
483         int xid;
484         __u32 oplock;
485         struct cifs_sb_info *cifs_sb;
486         struct cifsTconInfo *tcon;
487         struct cifsFileInfo *pCifsFile;
488         struct cifsInodeInfo *pCifsInode;
489         struct inode *inode;
490         char *full_path = NULL;
491         int desiredAccess;
492         int disposition = FILE_OPEN;
493         __u16 netfid;
494
495         if (file->private_data)
496                 pCifsFile = (struct cifsFileInfo *)file->private_data;
497         else
498                 return -EBADF;
499
500         xid = GetXid();
501         mutex_lock(&pCifsFile->fh_mutex);
502         if (!pCifsFile->invalidHandle) {
503                 mutex_unlock(&pCifsFile->fh_mutex);
504                 rc = 0;
505                 FreeXid(xid);
506                 return rc;
507         }
508
509         if (file->f_path.dentry == NULL) {
510                 cERROR(1, ("no valid name if dentry freed"));
511                 dump_stack();
512                 rc = -EBADF;
513                 goto reopen_error_exit;
514         }
515
516         inode = file->f_path.dentry->d_inode;
517         if (inode == NULL) {
518                 cERROR(1, ("inode not valid"));
519                 dump_stack();
520                 rc = -EBADF;
521                 goto reopen_error_exit;
522         }
523
524         cifs_sb = CIFS_SB(inode->i_sb);
525         tcon = cifs_sb->tcon;
526
527 /* can not grab rename sem here because various ops, including
528    those that already have the rename sem can end up causing writepage
529    to get called and if the server was down that means we end up here,
530    and we can never tell if the caller already has the rename_sem */
531         full_path = build_path_from_dentry(file->f_path.dentry);
532         if (full_path == NULL) {
533                 rc = -ENOMEM;
534 reopen_error_exit:
535                 mutex_unlock(&pCifsFile->fh_mutex);
536                 FreeXid(xid);
537                 return rc;
538         }
539
540         cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
541                  inode, file->f_flags, full_path));
542
543         if (oplockEnabled)
544                 oplock = REQ_OPLOCK;
545         else
546                 oplock = 0;
547
548         if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
549             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
550                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
551                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
552                 /* can not refresh inode info since size could be stale */
553                 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
554                                      cifs_sb->mnt_file_mode /* ignored */,
555                                      oflags, &oplock, &netfid, xid);
556                 if (rc == 0) {
557                         cFYI(1, ("posix reopen succeeded"));
558                         goto reopen_success;
559                 }
560                 /* fallthrough to retry open the old way on errors, especially
561                    in the reconnect path it is important to retry hard */
562         }
563
564         desiredAccess = cifs_convert_flags(file->f_flags);
565
566         /* Can not refresh inode by passing in file_info buf to be returned
567            by SMBOpen and then calling get_inode_info with returned buf
568            since file might have write behind data that needs to be flushed
569            and server version of file size can be stale. If we knew for sure
570            that inode was not dirty locally we could do this */
571
572         rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
573                          CREATE_NOT_DIR, &netfid, &oplock, NULL,
574                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
575                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
576         if (rc) {
577                 mutex_unlock(&pCifsFile->fh_mutex);
578                 cFYI(1, ("cifs_open returned 0x%x", rc));
579                 cFYI(1, ("oplock: %d", oplock));
580         } else {
581 reopen_success:
582                 pCifsFile->netfid = netfid;
583                 pCifsFile->invalidHandle = false;
584                 mutex_unlock(&pCifsFile->fh_mutex);
585                 pCifsInode = CIFS_I(inode);
586                 if (pCifsInode) {
587                         if (can_flush) {
588                                 rc = filemap_write_and_wait(inode->i_mapping);
589                                 if (rc != 0)
590                                         CIFS_I(inode)->write_behind_rc = rc;
591                         /* temporarily disable caching while we
592                            go to server to get inode info */
593                                 pCifsInode->clientCanCacheAll = false;
594                                 pCifsInode->clientCanCacheRead = false;
595                                 if (tcon->unix_ext)
596                                         rc = cifs_get_inode_info_unix(&inode,
597                                                 full_path, inode->i_sb, xid);
598                                 else
599                                         rc = cifs_get_inode_info(&inode,
600                                                 full_path, NULL, inode->i_sb,
601                                                 xid, NULL);
602                         } /* else we are writing out data to server already
603                              and could deadlock if we tried to flush data, and
604                              since we do not know if we have data that would
605                              invalidate the current end of file on the server
606                              we can not go to the server to get the new inod
607                              info */
608                         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
609                                 pCifsInode->clientCanCacheAll = true;
610                                 pCifsInode->clientCanCacheRead = true;
611                                 cFYI(1, ("Exclusive Oplock granted on inode %p",
612                                          file->f_path.dentry->d_inode));
613                         } else if ((oplock & 0xF) == OPLOCK_READ) {
614                                 pCifsInode->clientCanCacheRead = true;
615                                 pCifsInode->clientCanCacheAll = false;
616                         } else {
617                                 pCifsInode->clientCanCacheRead = false;
618                                 pCifsInode->clientCanCacheAll = false;
619                         }
620                         cifs_relock_file(pCifsFile);
621                 }
622         }
623         kfree(full_path);
624         FreeXid(xid);
625         return rc;
626 }
627
628 int cifs_close(struct inode *inode, struct file *file)
629 {
630         int rc = 0;
631         int xid, timeout;
632         struct cifs_sb_info *cifs_sb;
633         struct cifsTconInfo *pTcon;
634         struct cifsFileInfo *pSMBFile =
635                 (struct cifsFileInfo *)file->private_data;
636
637         xid = GetXid();
638
639         cifs_sb = CIFS_SB(inode->i_sb);
640         pTcon = cifs_sb->tcon;
641         if (pSMBFile) {
642                 struct cifsLockInfo *li, *tmp;
643                 write_lock(&GlobalSMBSeslock);
644                 pSMBFile->closePend = true;
645                 if (pTcon) {
646                         /* no sense reconnecting to close a file that is
647                            already closed */
648                         if (!pTcon->need_reconnect) {
649                                 write_unlock(&GlobalSMBSeslock);
650                                 timeout = 2;
651                                 while ((atomic_read(&pSMBFile->count) != 1)
652                                         && (timeout <= 2048)) {
653                                         /* Give write a better chance to get to
654                                         server ahead of the close.  We do not
655                                         want to add a wait_q here as it would
656                                         increase the memory utilization as
657                                         the struct would be in each open file,
658                                         but this should give enough time to
659                                         clear the socket */
660                                         cFYI(DBG2,
661                                                 ("close delay, write pending"));
662                                         msleep(timeout);
663                                         timeout *= 4;
664                                 }
665                                 if (!pTcon->need_reconnect &&
666                                     !pSMBFile->invalidHandle)
667                                         rc = CIFSSMBClose(xid, pTcon,
668                                                   pSMBFile->netfid);
669                         } else
670                                 write_unlock(&GlobalSMBSeslock);
671                 } else
672                         write_unlock(&GlobalSMBSeslock);
673
674                 /* Delete any outstanding lock records.
675                    We'll lose them when the file is closed anyway. */
676                 mutex_lock(&pSMBFile->lock_mutex);
677                 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
678                         list_del(&li->llist);
679                         kfree(li);
680                 }
681                 mutex_unlock(&pSMBFile->lock_mutex);
682
683                 write_lock(&GlobalSMBSeslock);
684                 list_del(&pSMBFile->flist);
685                 list_del(&pSMBFile->tlist);
686                 write_unlock(&GlobalSMBSeslock);
687                 cifsFileInfo_put(file->private_data);
688                 file->private_data = NULL;
689         } else
690                 rc = -EBADF;
691
692         read_lock(&GlobalSMBSeslock);
693         if (list_empty(&(CIFS_I(inode)->openFileList))) {
694                 cFYI(1, ("closing last open instance for inode %p", inode));
695                 /* if the file is not open we do not know if we can cache info
696                    on this inode, much less write behind and read ahead */
697                 CIFS_I(inode)->clientCanCacheRead = false;
698                 CIFS_I(inode)->clientCanCacheAll  = false;
699         }
700         read_unlock(&GlobalSMBSeslock);
701         if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
702                 rc = CIFS_I(inode)->write_behind_rc;
703         FreeXid(xid);
704         return rc;
705 }
706
707 int cifs_closedir(struct inode *inode, struct file *file)
708 {
709         int rc = 0;
710         int xid;
711         struct cifsFileInfo *pCFileStruct =
712             (struct cifsFileInfo *)file->private_data;
713         char *ptmp;
714
715         cFYI(1, ("Closedir inode = 0x%p", inode));
716
717         xid = GetXid();
718
719         if (pCFileStruct) {
720                 struct cifsTconInfo *pTcon;
721                 struct cifs_sb_info *cifs_sb =
722                         CIFS_SB(file->f_path.dentry->d_sb);
723
724                 pTcon = cifs_sb->tcon;
725
726                 cFYI(1, ("Freeing private data in close dir"));
727                 write_lock(&GlobalSMBSeslock);
728                 if (!pCFileStruct->srch_inf.endOfSearch &&
729                     !pCFileStruct->invalidHandle) {
730                         pCFileStruct->invalidHandle = true;
731                         write_unlock(&GlobalSMBSeslock);
732                         rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
733                         cFYI(1, ("Closing uncompleted readdir with rc %d",
734                                  rc));
735                         /* not much we can do if it fails anyway, ignore rc */
736                         rc = 0;
737                 } else
738                         write_unlock(&GlobalSMBSeslock);
739                 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
740                 if (ptmp) {
741                         cFYI(1, ("closedir free smb buf in srch struct"));
742                         pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
743                         if (pCFileStruct->srch_inf.smallBuf)
744                                 cifs_small_buf_release(ptmp);
745                         else
746                                 cifs_buf_release(ptmp);
747                 }
748                 kfree(file->private_data);
749                 file->private_data = NULL;
750         }
751         /* BB can we lock the filestruct while this is going on? */
752         FreeXid(xid);
753         return rc;
754 }
755
756 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
757                                 __u64 offset, __u8 lockType)
758 {
759         struct cifsLockInfo *li =
760                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
761         if (li == NULL)
762                 return -ENOMEM;
763         li->offset = offset;
764         li->length = len;
765         li->type = lockType;
766         mutex_lock(&fid->lock_mutex);
767         list_add(&li->llist, &fid->llist);
768         mutex_unlock(&fid->lock_mutex);
769         return 0;
770 }
771
772 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
773 {
774         int rc, xid;
775         __u32 numLock = 0;
776         __u32 numUnlock = 0;
777         __u64 length;
778         bool wait_flag = false;
779         struct cifs_sb_info *cifs_sb;
780         struct cifsTconInfo *tcon;
781         __u16 netfid;
782         __u8 lockType = LOCKING_ANDX_LARGE_FILES;
783         bool posix_locking = 0;
784
785         length = 1 + pfLock->fl_end - pfLock->fl_start;
786         rc = -EACCES;
787         xid = GetXid();
788
789         cFYI(1, ("Lock parm: 0x%x flockflags: "
790                  "0x%x flocktype: 0x%x start: %lld end: %lld",
791                 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
792                 pfLock->fl_end));
793
794         if (pfLock->fl_flags & FL_POSIX)
795                 cFYI(1, ("Posix"));
796         if (pfLock->fl_flags & FL_FLOCK)
797                 cFYI(1, ("Flock"));
798         if (pfLock->fl_flags & FL_SLEEP) {
799                 cFYI(1, ("Blocking lock"));
800                 wait_flag = true;
801         }
802         if (pfLock->fl_flags & FL_ACCESS)
803                 cFYI(1, ("Process suspended by mandatory locking - "
804                          "not implemented yet"));
805         if (pfLock->fl_flags & FL_LEASE)
806                 cFYI(1, ("Lease on file - not implemented yet"));
807         if (pfLock->fl_flags &
808             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
809                 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags));
810
811         if (pfLock->fl_type == F_WRLCK) {
812                 cFYI(1, ("F_WRLCK "));
813                 numLock = 1;
814         } else if (pfLock->fl_type == F_UNLCK) {
815                 cFYI(1, ("F_UNLCK"));
816                 numUnlock = 1;
817                 /* Check if unlock includes more than
818                 one lock range */
819         } else if (pfLock->fl_type == F_RDLCK) {
820                 cFYI(1, ("F_RDLCK"));
821                 lockType |= LOCKING_ANDX_SHARED_LOCK;
822                 numLock = 1;
823         } else if (pfLock->fl_type == F_EXLCK) {
824                 cFYI(1, ("F_EXLCK"));
825                 numLock = 1;
826         } else if (pfLock->fl_type == F_SHLCK) {
827                 cFYI(1, ("F_SHLCK"));
828                 lockType |= LOCKING_ANDX_SHARED_LOCK;
829                 numLock = 1;
830         } else
831                 cFYI(1, ("Unknown type of lock"));
832
833         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
834         tcon = cifs_sb->tcon;
835
836         if (file->private_data == NULL) {
837                 rc = -EBADF;
838                 FreeXid(xid);
839                 return rc;
840         }
841         netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
842
843         if ((tcon->ses->capabilities & CAP_UNIX) &&
844             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
845             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
846                 posix_locking = 1;
847         /* BB add code here to normalize offset and length to
848         account for negative length which we can not accept over the
849         wire */
850         if (IS_GETLK(cmd)) {
851                 if (posix_locking) {
852                         int posix_lock_type;
853                         if (lockType & LOCKING_ANDX_SHARED_LOCK)
854                                 posix_lock_type = CIFS_RDLCK;
855                         else
856                                 posix_lock_type = CIFS_WRLCK;
857                         rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
858                                         length, pfLock,
859                                         posix_lock_type, wait_flag);
860                         FreeXid(xid);
861                         return rc;
862                 }
863
864                 /* BB we could chain these into one lock request BB */
865                 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
866                                  0, 1, lockType, 0 /* wait flag */ );
867                 if (rc == 0) {
868                         rc = CIFSSMBLock(xid, tcon, netfid, length,
869                                          pfLock->fl_start, 1 /* numUnlock */ ,
870                                          0 /* numLock */ , lockType,
871                                          0 /* wait flag */ );
872                         pfLock->fl_type = F_UNLCK;
873                         if (rc != 0)
874                                 cERROR(1, ("Error unlocking previously locked "
875                                            "range %d during test of lock", rc));
876                         rc = 0;
877
878                 } else {
879                         /* if rc == ERR_SHARING_VIOLATION ? */
880                         rc = 0; /* do not change lock type to unlock
881                                    since range in use */
882                 }
883
884                 FreeXid(xid);
885                 return rc;
886         }
887
888         if (!numLock && !numUnlock) {
889                 /* if no lock or unlock then nothing
890                 to do since we do not know what it is */
891                 FreeXid(xid);
892                 return -EOPNOTSUPP;
893         }
894
895         if (posix_locking) {
896                 int posix_lock_type;
897                 if (lockType & LOCKING_ANDX_SHARED_LOCK)
898                         posix_lock_type = CIFS_RDLCK;
899                 else
900                         posix_lock_type = CIFS_WRLCK;
901
902                 if (numUnlock == 1)
903                         posix_lock_type = CIFS_UNLCK;
904
905                 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
906                                       length, pfLock,
907                                       posix_lock_type, wait_flag);
908         } else {
909                 struct cifsFileInfo *fid =
910                         (struct cifsFileInfo *)file->private_data;
911
912                 if (numLock) {
913                         rc = CIFSSMBLock(xid, tcon, netfid, length,
914                                         pfLock->fl_start,
915                                         0, numLock, lockType, wait_flag);
916
917                         if (rc == 0) {
918                                 /* For Windows locks we must store them. */
919                                 rc = store_file_lock(fid, length,
920                                                 pfLock->fl_start, lockType);
921                         }
922                 } else if (numUnlock) {
923                         /* For each stored lock that this unlock overlaps
924                            completely, unlock it. */
925                         int stored_rc = 0;
926                         struct cifsLockInfo *li, *tmp;
927
928                         rc = 0;
929                         mutex_lock(&fid->lock_mutex);
930                         list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
931                                 if (pfLock->fl_start <= li->offset &&
932                                                 (pfLock->fl_start + length) >=
933                                                 (li->offset + li->length)) {
934                                         stored_rc = CIFSSMBLock(xid, tcon,
935                                                         netfid,
936                                                         li->length, li->offset,
937                                                         1, 0, li->type, false);
938                                         if (stored_rc)
939                                                 rc = stored_rc;
940
941                                         list_del(&li->llist);
942                                         kfree(li);
943                                 }
944                         }
945                         mutex_unlock(&fid->lock_mutex);
946                 }
947         }
948
949         if (pfLock->fl_flags & FL_POSIX)
950                 posix_lock_file_wait(file, pfLock);
951         FreeXid(xid);
952         return rc;
953 }
954
955 /*
956  * Set the timeout on write requests past EOF. For some servers (Windows)
957  * these calls can be very long.
958  *
959  * If we're writing >10M past the EOF we give a 180s timeout. Anything less
960  * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
961  * The 10M cutoff is totally arbitrary. A better scheme for this would be
962  * welcome if someone wants to suggest one.
963  *
964  * We may be able to do a better job with this if there were some way to
965  * declare that a file should be sparse.
966  */
967 static int
968 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
969 {
970         if (offset <= cifsi->server_eof)
971                 return CIFS_STD_OP;
972         else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
973                 return CIFS_VLONG_OP;
974         else
975                 return CIFS_LONG_OP;
976 }
977
978 /* update the file size (if needed) after a write */
979 static void
980 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
981                       unsigned int bytes_written)
982 {
983         loff_t end_of_write = offset + bytes_written;
984
985         if (end_of_write > cifsi->server_eof)
986                 cifsi->server_eof = end_of_write;
987 }
988
989 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
990         size_t write_size, loff_t *poffset)
991 {
992         int rc = 0;
993         unsigned int bytes_written = 0;
994         unsigned int total_written;
995         struct cifs_sb_info *cifs_sb;
996         struct cifsTconInfo *pTcon;
997         int xid, long_op;
998         struct cifsFileInfo *open_file;
999         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1000
1001         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1002
1003         pTcon = cifs_sb->tcon;
1004
1005         /* cFYI(1,
1006            (" write %d bytes to offset %lld of %s", write_size,
1007            *poffset, file->f_path.dentry->d_name.name)); */
1008
1009         if (file->private_data == NULL)
1010                 return -EBADF;
1011         open_file = (struct cifsFileInfo *) file->private_data;
1012
1013         rc = generic_write_checks(file, poffset, &write_size, 0);
1014         if (rc)
1015                 return rc;
1016
1017         xid = GetXid();
1018
1019         long_op = cifs_write_timeout(cifsi, *poffset);
1020         for (total_written = 0; write_size > total_written;
1021              total_written += bytes_written) {
1022                 rc = -EAGAIN;
1023                 while (rc == -EAGAIN) {
1024                         if (file->private_data == NULL) {
1025                                 /* file has been closed on us */
1026                                 FreeXid(xid);
1027                         /* if we have gotten here we have written some data
1028                            and blocked, and the file has been freed on us while
1029                            we blocked so return what we managed to write */
1030                                 return total_written;
1031                         }
1032                         if (open_file->closePend) {
1033                                 FreeXid(xid);
1034                                 if (total_written)
1035                                         return total_written;
1036                                 else
1037                                         return -EBADF;
1038                         }
1039                         if (open_file->invalidHandle) {
1040                                 /* we could deadlock if we called
1041                                    filemap_fdatawait from here so tell
1042                                    reopen_file not to flush data to server
1043                                    now */
1044                                 rc = cifs_reopen_file(file, false);
1045                                 if (rc != 0)
1046                                         break;
1047                         }
1048
1049                         rc = CIFSSMBWrite(xid, pTcon,
1050                                 open_file->netfid,
1051                                 min_t(const int, cifs_sb->wsize,
1052                                       write_size - total_written),
1053                                 *poffset, &bytes_written,
1054                                 NULL, write_data + total_written, long_op);
1055                 }
1056                 if (rc || (bytes_written == 0)) {
1057                         if (total_written)
1058                                 break;
1059                         else {
1060                                 FreeXid(xid);
1061                                 return rc;
1062                         }
1063                 } else {
1064                         cifs_update_eof(cifsi, *poffset, bytes_written);
1065                         *poffset += bytes_written;
1066                 }
1067                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1068                                     15 seconds is plenty */
1069         }
1070
1071         cifs_stats_bytes_written(pTcon, total_written);
1072
1073         /* since the write may have blocked check these pointers again */
1074         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1075                 struct inode *inode = file->f_path.dentry->d_inode;
1076 /* Do not update local mtime - server will set its actual value on write
1077  *              inode->i_ctime = inode->i_mtime =
1078  *                      current_fs_time(inode->i_sb);*/
1079                 if (total_written > 0) {
1080                         spin_lock(&inode->i_lock);
1081                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1082                                 i_size_write(file->f_path.dentry->d_inode,
1083                                         *poffset);
1084                         spin_unlock(&inode->i_lock);
1085                 }
1086                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1087         }
1088         FreeXid(xid);
1089         return total_written;
1090 }
1091
1092 static ssize_t cifs_write(struct file *file, const char *write_data,
1093                           size_t write_size, loff_t *poffset)
1094 {
1095         int rc = 0;
1096         unsigned int bytes_written = 0;
1097         unsigned int total_written;
1098         struct cifs_sb_info *cifs_sb;
1099         struct cifsTconInfo *pTcon;
1100         int xid, long_op;
1101         struct cifsFileInfo *open_file;
1102         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1103
1104         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1105
1106         pTcon = cifs_sb->tcon;
1107
1108         cFYI(1, ("write %zd bytes to offset %lld of %s", write_size,
1109            *poffset, file->f_path.dentry->d_name.name));
1110
1111         if (file->private_data == NULL)
1112                 return -EBADF;
1113         open_file = (struct cifsFileInfo *)file->private_data;
1114
1115         xid = GetXid();
1116
1117         long_op = cifs_write_timeout(cifsi, *poffset);
1118         for (total_written = 0; write_size > total_written;
1119              total_written += bytes_written) {
1120                 rc = -EAGAIN;
1121                 while (rc == -EAGAIN) {
1122                         if (file->private_data == NULL) {
1123                                 /* file has been closed on us */
1124                                 FreeXid(xid);
1125                         /* if we have gotten here we have written some data
1126                            and blocked, and the file has been freed on us
1127                            while we blocked so return what we managed to
1128                            write */
1129                                 return total_written;
1130                         }
1131                         if (open_file->closePend) {
1132                                 FreeXid(xid);
1133                                 if (total_written)
1134                                         return total_written;
1135                                 else
1136                                         return -EBADF;
1137                         }
1138                         if (open_file->invalidHandle) {
1139                                 /* we could deadlock if we called
1140                                    filemap_fdatawait from here so tell
1141                                    reopen_file not to flush data to
1142                                    server now */
1143                                 rc = cifs_reopen_file(file, false);
1144                                 if (rc != 0)
1145                                         break;
1146                         }
1147                         if (experimEnabled || (pTcon->ses->server &&
1148                                 ((pTcon->ses->server->secMode &
1149                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1150                                 == 0))) {
1151                                 struct kvec iov[2];
1152                                 unsigned int len;
1153
1154                                 len = min((size_t)cifs_sb->wsize,
1155                                           write_size - total_written);
1156                                 /* iov[0] is reserved for smb header */
1157                                 iov[1].iov_base = (char *)write_data +
1158                                                   total_written;
1159                                 iov[1].iov_len = len;
1160                                 rc = CIFSSMBWrite2(xid, pTcon,
1161                                                 open_file->netfid, len,
1162                                                 *poffset, &bytes_written,
1163                                                 iov, 1, long_op);
1164                         } else
1165                                 rc = CIFSSMBWrite(xid, pTcon,
1166                                          open_file->netfid,
1167                                          min_t(const int, cifs_sb->wsize,
1168                                                write_size - total_written),
1169                                          *poffset, &bytes_written,
1170                                          write_data + total_written,
1171                                          NULL, long_op);
1172                 }
1173                 if (rc || (bytes_written == 0)) {
1174                         if (total_written)
1175                                 break;
1176                         else {
1177                                 FreeXid(xid);
1178                                 return rc;
1179                         }
1180                 } else {
1181                         cifs_update_eof(cifsi, *poffset, bytes_written);
1182                         *poffset += bytes_written;
1183                 }
1184                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1185                                     15 seconds is plenty */
1186         }
1187
1188         cifs_stats_bytes_written(pTcon, total_written);
1189
1190         /* since the write may have blocked check these pointers again */
1191         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1192 /*BB We could make this contingent on superblock ATIME flag too */
1193 /*              file->f_path.dentry->d_inode->i_ctime =
1194                 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1195                 if (total_written > 0) {
1196                         spin_lock(&file->f_path.dentry->d_inode->i_lock);
1197                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1198                                 i_size_write(file->f_path.dentry->d_inode,
1199                                              *poffset);
1200                         spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1201                 }
1202                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1203         }
1204         FreeXid(xid);
1205         return total_written;
1206 }
1207
1208 #ifdef CONFIG_CIFS_EXPERIMENTAL
1209 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1210 {
1211         struct cifsFileInfo *open_file = NULL;
1212
1213         read_lock(&GlobalSMBSeslock);
1214         /* we could simply get the first_list_entry since write-only entries
1215            are always at the end of the list but since the first entry might
1216            have a close pending, we go through the whole list */
1217         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1218                 if (open_file->closePend)
1219                         continue;
1220                 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1221                     (open_file->pfile->f_flags & O_RDONLY))) {
1222                         if (!open_file->invalidHandle) {
1223                                 /* found a good file */
1224                                 /* lock it so it will not be closed on us */
1225                                 cifsFileInfo_get(open_file);
1226                                 read_unlock(&GlobalSMBSeslock);
1227                                 return open_file;
1228                         } /* else might as well continue, and look for
1229                              another, or simply have the caller reopen it
1230                              again rather than trying to fix this handle */
1231                 } else /* write only file */
1232                         break; /* write only files are last so must be done */
1233         }
1234         read_unlock(&GlobalSMBSeslock);
1235         return NULL;
1236 }
1237 #endif
1238
1239 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1240 {
1241         struct cifsFileInfo *open_file;
1242         bool any_available = false;
1243         int rc;
1244
1245         /* Having a null inode here (because mapping->host was set to zero by
1246         the VFS or MM) should not happen but we had reports of on oops (due to
1247         it being zero) during stress testcases so we need to check for it */
1248
1249         if (cifs_inode == NULL) {
1250                 cERROR(1, ("Null inode passed to cifs_writeable_file"));
1251                 dump_stack();
1252                 return NULL;
1253         }
1254
1255         read_lock(&GlobalSMBSeslock);
1256 refind_writable:
1257         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1258                 if (open_file->closePend ||
1259                     (!any_available && open_file->pid != current->tgid))
1260                         continue;
1261
1262                 if (open_file->pfile &&
1263                     ((open_file->pfile->f_flags & O_RDWR) ||
1264                      (open_file->pfile->f_flags & O_WRONLY))) {
1265                         cifsFileInfo_get(open_file);
1266
1267                         if (!open_file->invalidHandle) {
1268                                 /* found a good writable file */
1269                                 read_unlock(&GlobalSMBSeslock);
1270                                 return open_file;
1271                         }
1272
1273                         read_unlock(&GlobalSMBSeslock);
1274                         /* Had to unlock since following call can block */
1275                         rc = cifs_reopen_file(open_file->pfile, false);
1276                         if (!rc) {
1277                                 if (!open_file->closePend)
1278                                         return open_file;
1279                                 else { /* start over in case this was deleted */
1280                                        /* since the list could be modified */
1281                                         read_lock(&GlobalSMBSeslock);
1282                                         cifsFileInfo_put(open_file);
1283                                         goto refind_writable;
1284                                 }
1285                         }
1286
1287                         /* if it fails, try another handle if possible -
1288                         (we can not do this if closePending since
1289                         loop could be modified - in which case we
1290                         have to start at the beginning of the list
1291                         again. Note that it would be bad
1292                         to hold up writepages here (rather than
1293                         in caller) with continuous retries */
1294                         cFYI(1, ("wp failed on reopen file"));
1295                         read_lock(&GlobalSMBSeslock);
1296                         /* can not use this handle, no write
1297                            pending on this one after all */
1298                         cifsFileInfo_put(open_file);
1299
1300                         if (open_file->closePend) /* list could have changed */
1301                                 goto refind_writable;
1302                         /* else we simply continue to the next entry. Thus
1303                            we do not loop on reopen errors.  If we
1304                            can not reopen the file, for example if we
1305                            reconnected to a server with another client
1306                            racing to delete or lock the file we would not
1307                            make progress if we restarted before the beginning
1308                            of the loop here. */
1309                 }
1310         }
1311         /* couldn't find useable FH with same pid, try any available */
1312         if (!any_available) {
1313                 any_available = true;
1314                 goto refind_writable;
1315         }
1316         read_unlock(&GlobalSMBSeslock);
1317         return NULL;
1318 }
1319
1320 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1321 {
1322         struct address_space *mapping = page->mapping;
1323         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1324         char *write_data;
1325         int rc = -EFAULT;
1326         int bytes_written = 0;
1327         struct cifs_sb_info *cifs_sb;
1328         struct cifsTconInfo *pTcon;
1329         struct inode *inode;
1330         struct cifsFileInfo *open_file;
1331
1332         if (!mapping || !mapping->host)
1333                 return -EFAULT;
1334
1335         inode = page->mapping->host;
1336         cifs_sb = CIFS_SB(inode->i_sb);
1337         pTcon = cifs_sb->tcon;
1338
1339         offset += (loff_t)from;
1340         write_data = kmap(page);
1341         write_data += from;
1342
1343         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1344                 kunmap(page);
1345                 return -EIO;
1346         }
1347
1348         /* racing with truncate? */
1349         if (offset > mapping->host->i_size) {
1350                 kunmap(page);
1351                 return 0; /* don't care */
1352         }
1353
1354         /* check to make sure that we are not extending the file */
1355         if (mapping->host->i_size - offset < (loff_t)to)
1356                 to = (unsigned)(mapping->host->i_size - offset);
1357
1358         open_file = find_writable_file(CIFS_I(mapping->host));
1359         if (open_file) {
1360                 bytes_written = cifs_write(open_file->pfile, write_data,
1361                                            to-from, &offset);
1362                 cifsFileInfo_put(open_file);
1363                 /* Does mm or vfs already set times? */
1364                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1365                 if ((bytes_written > 0) && (offset))
1366                         rc = 0;
1367                 else if (bytes_written < 0)
1368                         rc = bytes_written;
1369         } else {
1370                 cFYI(1, ("No writeable filehandles for inode"));
1371                 rc = -EIO;
1372         }
1373
1374         kunmap(page);
1375         return rc;
1376 }
1377
1378 static int cifs_writepages(struct address_space *mapping,
1379                            struct writeback_control *wbc)
1380 {
1381         struct backing_dev_info *bdi = mapping->backing_dev_info;
1382         unsigned int bytes_to_write;
1383         unsigned int bytes_written;
1384         struct cifs_sb_info *cifs_sb;
1385         int done = 0;
1386         pgoff_t end;
1387         pgoff_t index;
1388         int range_whole = 0;
1389         struct kvec *iov;
1390         int len;
1391         int n_iov = 0;
1392         pgoff_t next;
1393         int nr_pages;
1394         __u64 offset = 0;
1395         struct cifsFileInfo *open_file;
1396         struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1397         struct page *page;
1398         struct pagevec pvec;
1399         int rc = 0;
1400         int scanned = 0;
1401         int xid, long_op;
1402
1403         cifs_sb = CIFS_SB(mapping->host->i_sb);
1404
1405         /*
1406          * If wsize is smaller that the page cache size, default to writing
1407          * one page at a time via cifs_writepage
1408          */
1409         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1410                 return generic_writepages(mapping, wbc);
1411
1412         if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1413                 if (cifs_sb->tcon->ses->server->secMode &
1414                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1415                         if (!experimEnabled)
1416                                 return generic_writepages(mapping, wbc);
1417
1418         iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1419         if (iov == NULL)
1420                 return generic_writepages(mapping, wbc);
1421
1422
1423         /*
1424          * BB: Is this meaningful for a non-block-device file system?
1425          * If it is, we should test it again after we do I/O
1426          */
1427         if (wbc->nonblocking && bdi_write_congested(bdi)) {
1428                 wbc->encountered_congestion = 1;
1429                 kfree(iov);
1430                 return 0;
1431         }
1432
1433         xid = GetXid();
1434
1435         pagevec_init(&pvec, 0);
1436         if (wbc->range_cyclic) {
1437                 index = mapping->writeback_index; /* Start from prev offset */
1438                 end = -1;
1439         } else {
1440                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1441                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1442                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1443                         range_whole = 1;
1444                 scanned = 1;
1445         }
1446 retry:
1447         while (!done && (index <= end) &&
1448                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1449                         PAGECACHE_TAG_DIRTY,
1450                         min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1451                 int first;
1452                 unsigned int i;
1453
1454                 first = -1;
1455                 next = 0;
1456                 n_iov = 0;
1457                 bytes_to_write = 0;
1458
1459                 for (i = 0; i < nr_pages; i++) {
1460                         page = pvec.pages[i];
1461                         /*
1462                          * At this point we hold neither mapping->tree_lock nor
1463                          * lock on the page itself: the page may be truncated or
1464                          * invalidated (changing page->mapping to NULL), or even
1465                          * swizzled back from swapper_space to tmpfs file
1466                          * mapping
1467                          */
1468
1469                         if (first < 0)
1470                                 lock_page(page);
1471                         else if (!trylock_page(page))
1472                                 break;
1473
1474                         if (unlikely(page->mapping != mapping)) {
1475                                 unlock_page(page);
1476                                 break;
1477                         }
1478
1479                         if (!wbc->range_cyclic && page->index > end) {
1480                                 done = 1;
1481                                 unlock_page(page);
1482                                 break;
1483                         }
1484
1485                         if (next && (page->index != next)) {
1486                                 /* Not next consecutive page */
1487                                 unlock_page(page);
1488                                 break;
1489                         }
1490
1491                         if (wbc->sync_mode != WB_SYNC_NONE)
1492                                 wait_on_page_writeback(page);
1493
1494                         if (PageWriteback(page) ||
1495                                         !clear_page_dirty_for_io(page)) {
1496                                 unlock_page(page);
1497                                 break;
1498                         }
1499
1500                         /*
1501                          * This actually clears the dirty bit in the radix tree.
1502                          * See cifs_writepage() for more commentary.
1503                          */
1504                         set_page_writeback(page);
1505
1506                         if (page_offset(page) >= mapping->host->i_size) {
1507                                 done = 1;
1508                                 unlock_page(page);
1509                                 end_page_writeback(page);
1510                                 break;
1511                         }
1512
1513                         /*
1514                          * BB can we get rid of this?  pages are held by pvec
1515                          */
1516                         page_cache_get(page);
1517
1518                         len = min(mapping->host->i_size - page_offset(page),
1519                                   (loff_t)PAGE_CACHE_SIZE);
1520
1521                         /* reserve iov[0] for the smb header */
1522                         n_iov++;
1523                         iov[n_iov].iov_base = kmap(page);
1524                         iov[n_iov].iov_len = len;
1525                         bytes_to_write += len;
1526
1527                         if (first < 0) {
1528                                 first = i;
1529                                 offset = page_offset(page);
1530                         }
1531                         next = page->index + 1;
1532                         if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1533                                 break;
1534                 }
1535                 if (n_iov) {
1536                         /* Search for a writable handle every time we call
1537                          * CIFSSMBWrite2.  We can't rely on the last handle
1538                          * we used to still be valid
1539                          */
1540                         open_file = find_writable_file(CIFS_I(mapping->host));
1541                         if (!open_file) {
1542                                 cERROR(1, ("No writable handles for inode"));
1543                                 rc = -EBADF;
1544                         } else {
1545                                 long_op = cifs_write_timeout(cifsi, offset);
1546                                 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1547                                                    open_file->netfid,
1548                                                    bytes_to_write, offset,
1549                                                    &bytes_written, iov, n_iov,
1550                                                    long_op);
1551                                 cifsFileInfo_put(open_file);
1552                                 cifs_update_eof(cifsi, offset, bytes_written);
1553
1554                                 if (rc || bytes_written < bytes_to_write) {
1555                                         cERROR(1, ("Write2 ret %d, wrote %d",
1556                                                   rc, bytes_written));
1557                                         /* BB what if continued retry is
1558                                            requested via mount flags? */
1559                                         if (rc == -ENOSPC)
1560                                                 set_bit(AS_ENOSPC, &mapping->flags);
1561                                         else
1562                                                 set_bit(AS_EIO, &mapping->flags);
1563                                 } else {
1564                                         cifs_stats_bytes_written(cifs_sb->tcon,
1565                                                                  bytes_written);
1566                                 }
1567                         }
1568                         for (i = 0; i < n_iov; i++) {
1569                                 page = pvec.pages[first + i];
1570                                 /* Should we also set page error on
1571                                 success rc but too little data written? */
1572                                 /* BB investigate retry logic on temporary
1573                                 server crash cases and how recovery works
1574                                 when page marked as error */
1575                                 if (rc)
1576                                         SetPageError(page);
1577                                 kunmap(page);
1578                                 unlock_page(page);
1579                                 end_page_writeback(page);
1580                                 page_cache_release(page);
1581                         }
1582                         if ((wbc->nr_to_write -= n_iov) <= 0)
1583                                 done = 1;
1584                         index = next;
1585                 } else
1586                         /* Need to re-find the pages we skipped */
1587                         index = pvec.pages[0]->index + 1;
1588
1589                 pagevec_release(&pvec);
1590         }
1591         if (!scanned && !done) {
1592                 /*
1593                  * We hit the last page and there is more work to be done: wrap
1594                  * back to the start of the file
1595                  */
1596                 scanned = 1;
1597                 index = 0;
1598                 goto retry;
1599         }
1600         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1601                 mapping->writeback_index = index;
1602
1603         FreeXid(xid);
1604         kfree(iov);
1605         return rc;
1606 }
1607
1608 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1609 {
1610         int rc = -EFAULT;
1611         int xid;
1612
1613         xid = GetXid();
1614 /* BB add check for wbc flags */
1615         page_cache_get(page);
1616         if (!PageUptodate(page))
1617                 cFYI(1, ("ppw - page not up to date"));
1618
1619         /*
1620          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1621          *
1622          * A writepage() implementation always needs to do either this,
1623          * or re-dirty the page with "redirty_page_for_writepage()" in
1624          * the case of a failure.
1625          *
1626          * Just unlocking the page will cause the radix tree tag-bits
1627          * to fail to update with the state of the page correctly.
1628          */
1629         set_page_writeback(page);
1630         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1631         SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1632         unlock_page(page);
1633         end_page_writeback(page);
1634         page_cache_release(page);
1635         FreeXid(xid);
1636         return rc;
1637 }
1638
1639 static int cifs_write_end(struct file *file, struct address_space *mapping,
1640                         loff_t pos, unsigned len, unsigned copied,
1641                         struct page *page, void *fsdata)
1642 {
1643         int rc;
1644         struct inode *inode = mapping->host;
1645
1646         cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
1647                  page, pos, copied));
1648
1649         if (PageChecked(page)) {
1650                 if (copied == len)
1651                         SetPageUptodate(page);
1652                 ClearPageChecked(page);
1653         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1654                 SetPageUptodate(page);
1655
1656         if (!PageUptodate(page)) {
1657                 char *page_data;
1658                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1659                 int xid;
1660
1661                 xid = GetXid();
1662                 /* this is probably better than directly calling
1663                    partialpage_write since in this function the file handle is
1664                    known which we might as well leverage */
1665                 /* BB check if anything else missing out of ppw
1666                    such as updating last write time */
1667                 page_data = kmap(page);
1668                 rc = cifs_write(file, page_data + offset, copied, &pos);
1669                 /* if (rc < 0) should we set writebehind rc? */
1670                 kunmap(page);
1671
1672                 FreeXid(xid);
1673         } else {
1674                 rc = copied;
1675                 pos += copied;
1676                 set_page_dirty(page);
1677         }
1678
1679         if (rc > 0) {
1680                 spin_lock(&inode->i_lock);
1681                 if (pos > inode->i_size)
1682                         i_size_write(inode, pos);
1683                 spin_unlock(&inode->i_lock);
1684         }
1685
1686         unlock_page(page);
1687         page_cache_release(page);
1688
1689         return rc;
1690 }
1691
1692 int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1693 {
1694         int xid;
1695         int rc = 0;
1696         struct cifsTconInfo *tcon;
1697         struct cifsFileInfo *smbfile =
1698                 (struct cifsFileInfo *)file->private_data;
1699         struct inode *inode = file->f_path.dentry->d_inode;
1700
1701         xid = GetXid();
1702
1703         cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1704                 dentry->d_name.name, datasync));
1705
1706         rc = filemap_write_and_wait(inode->i_mapping);
1707         if (rc == 0) {
1708                 rc = CIFS_I(inode)->write_behind_rc;
1709                 CIFS_I(inode)->write_behind_rc = 0;
1710                 tcon = CIFS_SB(inode->i_sb)->tcon;
1711                 if (!rc && tcon && smbfile &&
1712                    !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1713                         rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1714         }
1715
1716         FreeXid(xid);
1717         return rc;
1718 }
1719
1720 /* static void cifs_sync_page(struct page *page)
1721 {
1722         struct address_space *mapping;
1723         struct inode *inode;
1724         unsigned long index = page->index;
1725         unsigned int rpages = 0;
1726         int rc = 0;
1727
1728         cFYI(1, ("sync page %p",page));
1729         mapping = page->mapping;
1730         if (!mapping)
1731                 return 0;
1732         inode = mapping->host;
1733         if (!inode)
1734                 return; */
1735
1736 /*      fill in rpages then
1737         result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1738
1739 /*      cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1740
1741 #if 0
1742         if (rc < 0)
1743                 return rc;
1744         return 0;
1745 #endif
1746 } */
1747
1748 /*
1749  * As file closes, flush all cached write data for this inode checking
1750  * for write behind errors.
1751  */
1752 int cifs_flush(struct file *file, fl_owner_t id)
1753 {
1754         struct inode *inode = file->f_path.dentry->d_inode;
1755         int rc = 0;
1756
1757         /* Rather than do the steps manually:
1758            lock the inode for writing
1759            loop through pages looking for write behind data (dirty pages)
1760            coalesce into contiguous 16K (or smaller) chunks to write to server
1761            send to server (prefer in parallel)
1762            deal with writebehind errors
1763            unlock inode for writing
1764            filemapfdatawrite appears easier for the time being */
1765
1766         rc = filemap_fdatawrite(inode->i_mapping);
1767         /* reset wb rc if we were able to write out dirty pages */
1768         if (!rc) {
1769                 rc = CIFS_I(inode)->write_behind_rc;
1770                 CIFS_I(inode)->write_behind_rc = 0;
1771         }
1772
1773         cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc));
1774
1775         return rc;
1776 }
1777
1778 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1779         size_t read_size, loff_t *poffset)
1780 {
1781         int rc = -EACCES;
1782         unsigned int bytes_read = 0;
1783         unsigned int total_read = 0;
1784         unsigned int current_read_size;
1785         struct cifs_sb_info *cifs_sb;
1786         struct cifsTconInfo *pTcon;
1787         int xid;
1788         struct cifsFileInfo *open_file;
1789         char *smb_read_data;
1790         char __user *current_offset;
1791         struct smb_com_read_rsp *pSMBr;
1792
1793         xid = GetXid();
1794         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1795         pTcon = cifs_sb->tcon;
1796
1797         if (file->private_data == NULL) {
1798                 rc = -EBADF;
1799                 FreeXid(xid);
1800                 return rc;
1801         }
1802         open_file = (struct cifsFileInfo *)file->private_data;
1803
1804         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1805                 cFYI(1, ("attempting read on write only file instance"));
1806
1807         for (total_read = 0, current_offset = read_data;
1808              read_size > total_read;
1809              total_read += bytes_read, current_offset += bytes_read) {
1810                 current_read_size = min_t(const int, read_size - total_read,
1811                                           cifs_sb->rsize);
1812                 rc = -EAGAIN;
1813                 smb_read_data = NULL;
1814                 while (rc == -EAGAIN) {
1815                         int buf_type = CIFS_NO_BUFFER;
1816                         if ((open_file->invalidHandle) &&
1817                             (!open_file->closePend)) {
1818                                 rc = cifs_reopen_file(file, true);
1819                                 if (rc != 0)
1820                                         break;
1821                         }
1822                         rc = CIFSSMBRead(xid, pTcon,
1823                                          open_file->netfid,
1824                                          current_read_size, *poffset,
1825                                          &bytes_read, &smb_read_data,
1826                                          &buf_type);
1827                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1828                         if (smb_read_data) {
1829                                 if (copy_to_user(current_offset,
1830                                                 smb_read_data +
1831                                                 4 /* RFC1001 length field */ +
1832                                                 le16_to_cpu(pSMBr->DataOffset),
1833                                                 bytes_read))
1834                                         rc = -EFAULT;
1835
1836                                 if (buf_type == CIFS_SMALL_BUFFER)
1837                                         cifs_small_buf_release(smb_read_data);
1838                                 else if (buf_type == CIFS_LARGE_BUFFER)
1839                                         cifs_buf_release(smb_read_data);
1840                                 smb_read_data = NULL;
1841                         }
1842                 }
1843                 if (rc || (bytes_read == 0)) {
1844                         if (total_read) {
1845                                 break;
1846                         } else {
1847                                 FreeXid(xid);
1848                                 return rc;
1849                         }
1850                 } else {
1851                         cifs_stats_bytes_read(pTcon, bytes_read);
1852                         *poffset += bytes_read;
1853                 }
1854         }
1855         FreeXid(xid);
1856         return total_read;
1857 }
1858
1859
1860 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1861         loff_t *poffset)
1862 {
1863         int rc = -EACCES;
1864         unsigned int bytes_read = 0;
1865         unsigned int total_read;
1866         unsigned int current_read_size;
1867         struct cifs_sb_info *cifs_sb;
1868         struct cifsTconInfo *pTcon;
1869         int xid;
1870         char *current_offset;
1871         struct cifsFileInfo *open_file;
1872         int buf_type = CIFS_NO_BUFFER;
1873
1874         xid = GetXid();
1875         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1876         pTcon = cifs_sb->tcon;
1877
1878         if (file->private_data == NULL) {
1879                 rc = -EBADF;
1880                 FreeXid(xid);
1881                 return rc;
1882         }
1883         open_file = (struct cifsFileInfo *)file->private_data;
1884
1885         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1886                 cFYI(1, ("attempting read on write only file instance"));
1887
1888         for (total_read = 0, current_offset = read_data;
1889              read_size > total_read;
1890              total_read += bytes_read, current_offset += bytes_read) {
1891                 current_read_size = min_t(const int, read_size - total_read,
1892                                           cifs_sb->rsize);
1893                 /* For windows me and 9x we do not want to request more
1894                 than it negotiated since it will refuse the read then */
1895                 if ((pTcon->ses) &&
1896                         !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1897                         current_read_size = min_t(const int, current_read_size,
1898                                         pTcon->ses->server->maxBuf - 128);
1899                 }
1900                 rc = -EAGAIN;
1901                 while (rc == -EAGAIN) {
1902                         if ((open_file->invalidHandle) &&
1903                             (!open_file->closePend)) {
1904                                 rc = cifs_reopen_file(file, true);
1905                                 if (rc != 0)
1906                                         break;
1907                         }
1908                         rc = CIFSSMBRead(xid, pTcon,
1909                                          open_file->netfid,
1910                                          current_read_size, *poffset,
1911                                          &bytes_read, &current_offset,
1912                                          &buf_type);
1913                 }
1914                 if (rc || (bytes_read == 0)) {
1915                         if (total_read) {
1916                                 break;
1917                         } else {
1918                                 FreeXid(xid);
1919                                 return rc;
1920                         }
1921                 } else {
1922                         cifs_stats_bytes_read(pTcon, total_read);
1923                         *poffset += bytes_read;
1924                 }
1925         }
1926         FreeXid(xid);
1927         return total_read;
1928 }
1929
1930 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1931 {
1932         struct dentry *dentry = file->f_path.dentry;
1933         int rc, xid;
1934
1935         xid = GetXid();
1936         rc = cifs_revalidate(dentry);
1937         if (rc) {
1938                 cFYI(1, ("Validation prior to mmap failed, error=%d", rc));
1939                 FreeXid(xid);
1940                 return rc;
1941         }
1942         rc = generic_file_mmap(file, vma);
1943         FreeXid(xid);
1944         return rc;
1945 }
1946
1947
1948 static void cifs_copy_cache_pages(struct address_space *mapping,
1949         struct list_head *pages, int bytes_read, char *data,
1950         struct pagevec *plru_pvec)
1951 {
1952         struct page *page;
1953         char *target;
1954
1955         while (bytes_read > 0) {
1956                 if (list_empty(pages))
1957                         break;
1958
1959                 page = list_entry(pages->prev, struct page, lru);
1960                 list_del(&page->lru);
1961
1962                 if (add_to_page_cache(page, mapping, page->index,
1963                                       GFP_KERNEL)) {
1964                         page_cache_release(page);
1965                         cFYI(1, ("Add page cache failed"));
1966                         data += PAGE_CACHE_SIZE;
1967                         bytes_read -= PAGE_CACHE_SIZE;
1968                         continue;
1969                 }
1970
1971                 target = kmap_atomic(page, KM_USER0);
1972
1973                 if (PAGE_CACHE_SIZE > bytes_read) {
1974                         memcpy(target, data, bytes_read);
1975                         /* zero the tail end of this partial page */
1976                         memset(target + bytes_read, 0,
1977                                PAGE_CACHE_SIZE - bytes_read);
1978                         bytes_read = 0;
1979                 } else {
1980                         memcpy(target, data, PAGE_CACHE_SIZE);
1981                         bytes_read -= PAGE_CACHE_SIZE;
1982                 }
1983                 kunmap_atomic(target, KM_USER0);
1984
1985                 flush_dcache_page(page);
1986                 SetPageUptodate(page);
1987                 unlock_page(page);
1988                 if (!pagevec_add(plru_pvec, page))
1989                         __pagevec_lru_add_file(plru_pvec);
1990                 data += PAGE_CACHE_SIZE;
1991         }
1992         return;
1993 }
1994
1995 static int cifs_readpages(struct file *file, struct address_space *mapping,
1996         struct list_head *page_list, unsigned num_pages)
1997 {
1998         int rc = -EACCES;
1999         int xid;
2000         loff_t offset;
2001         struct page *page;
2002         struct cifs_sb_info *cifs_sb;
2003         struct cifsTconInfo *pTcon;
2004         unsigned int bytes_read = 0;
2005         unsigned int read_size, i;
2006         char *smb_read_data = NULL;
2007         struct smb_com_read_rsp *pSMBr;
2008         struct pagevec lru_pvec;
2009         struct cifsFileInfo *open_file;
2010         int buf_type = CIFS_NO_BUFFER;
2011
2012         xid = GetXid();
2013         if (file->private_data == NULL) {
2014                 rc = -EBADF;
2015                 FreeXid(xid);
2016                 return rc;
2017         }
2018         open_file = (struct cifsFileInfo *)file->private_data;
2019         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2020         pTcon = cifs_sb->tcon;
2021
2022         pagevec_init(&lru_pvec, 0);
2023         cFYI(DBG2, ("rpages: num pages %d", num_pages));
2024         for (i = 0; i < num_pages; ) {
2025                 unsigned contig_pages;
2026                 struct page *tmp_page;
2027                 unsigned long expected_index;
2028
2029                 if (list_empty(page_list))
2030                         break;
2031
2032                 page = list_entry(page_list->prev, struct page, lru);
2033                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2034
2035                 /* count adjacent pages that we will read into */
2036                 contig_pages = 0;
2037                 expected_index =
2038                         list_entry(page_list->prev, struct page, lru)->index;
2039                 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2040                         if (tmp_page->index == expected_index) {
2041                                 contig_pages++;
2042                                 expected_index++;
2043                         } else
2044                                 break;
2045                 }
2046                 if (contig_pages + i >  num_pages)
2047                         contig_pages = num_pages - i;
2048
2049                 /* for reads over a certain size could initiate async
2050                    read ahead */
2051
2052                 read_size = contig_pages * PAGE_CACHE_SIZE;
2053                 /* Read size needs to be in multiples of one page */
2054                 read_size = min_t(const unsigned int, read_size,
2055                                   cifs_sb->rsize & PAGE_CACHE_MASK);
2056                 cFYI(DBG2, ("rpages: read size 0x%x  contiguous pages %d",
2057                                 read_size, contig_pages));
2058                 rc = -EAGAIN;
2059                 while (rc == -EAGAIN) {
2060                         if ((open_file->invalidHandle) &&
2061                             (!open_file->closePend)) {
2062                                 rc = cifs_reopen_file(file, true);
2063                                 if (rc != 0)
2064                                         break;
2065                         }
2066
2067                         rc = CIFSSMBRead(xid, pTcon,
2068                                          open_file->netfid,
2069                                          read_size, offset,
2070                                          &bytes_read, &smb_read_data,
2071                                          &buf_type);
2072                         /* BB more RC checks ? */
2073                         if (rc == -EAGAIN) {
2074                                 if (smb_read_data) {
2075                                         if (buf_type == CIFS_SMALL_BUFFER)
2076                                                 cifs_small_buf_release(smb_read_data);
2077                                         else if (buf_type == CIFS_LARGE_BUFFER)
2078                                                 cifs_buf_release(smb_read_data);
2079                                         smb_read_data = NULL;
2080                                 }
2081                         }
2082                 }
2083                 if ((rc < 0) || (smb_read_data == NULL)) {
2084                         cFYI(1, ("Read error in readpages: %d", rc));
2085                         break;
2086                 } else if (bytes_read > 0) {
2087                         task_io_account_read(bytes_read);
2088                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2089                         cifs_copy_cache_pages(mapping, page_list, bytes_read,
2090                                 smb_read_data + 4 /* RFC1001 hdr */ +
2091                                 le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
2092
2093                         i +=  bytes_read >> PAGE_CACHE_SHIFT;
2094                         cifs_stats_bytes_read(pTcon, bytes_read);
2095                         if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2096                                 i++; /* account for partial page */
2097
2098                                 /* server copy of file can have smaller size
2099                                    than client */
2100                                 /* BB do we need to verify this common case ?
2101                                    this case is ok - if we are at server EOF
2102                                    we will hit it on next read */
2103
2104                                 /* break; */
2105                         }
2106                 } else {
2107                         cFYI(1, ("No bytes read (%d) at offset %lld . "
2108                                  "Cleaning remaining pages from readahead list",
2109                                  bytes_read, offset));
2110                         /* BB turn off caching and do new lookup on
2111                            file size at server? */
2112                         break;
2113                 }
2114                 if (smb_read_data) {
2115                         if (buf_type == CIFS_SMALL_BUFFER)
2116                                 cifs_small_buf_release(smb_read_data);
2117                         else if (buf_type == CIFS_LARGE_BUFFER)
2118                                 cifs_buf_release(smb_read_data);
2119                         smb_read_data = NULL;
2120                 }
2121                 bytes_read = 0;
2122         }
2123
2124         pagevec_lru_add_file(&lru_pvec);
2125
2126 /* need to free smb_read_data buf before exit */
2127         if (smb_read_data) {
2128                 if (buf_type == CIFS_SMALL_BUFFER)
2129                         cifs_small_buf_release(smb_read_data);
2130                 else if (buf_type == CIFS_LARGE_BUFFER)
2131                         cifs_buf_release(smb_read_data);
2132                 smb_read_data = NULL;
2133         }
2134
2135         FreeXid(xid);
2136         return rc;
2137 }
2138
2139 static int cifs_readpage_worker(struct file *file, struct page *page,
2140         loff_t *poffset)
2141 {
2142         char *read_data;
2143         int rc;
2144
2145         page_cache_get(page);
2146         read_data = kmap(page);
2147         /* for reads over a certain size could initiate async read ahead */
2148
2149         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2150
2151         if (rc < 0)
2152                 goto io_error;
2153         else
2154                 cFYI(1, ("Bytes read %d", rc));
2155
2156         file->f_path.dentry->d_inode->i_atime =
2157                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2158
2159         if (PAGE_CACHE_SIZE > rc)
2160                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2161
2162         flush_dcache_page(page);
2163         SetPageUptodate(page);
2164         rc = 0;
2165
2166 io_error:
2167         kunmap(page);
2168         page_cache_release(page);
2169         return rc;
2170 }
2171
2172 static int cifs_readpage(struct file *file, struct page *page)
2173 {
2174         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2175         int rc = -EACCES;
2176         int xid;
2177
2178         xid = GetXid();
2179
2180         if (file->private_data == NULL) {
2181                 rc = -EBADF;
2182                 FreeXid(xid);
2183                 return rc;
2184         }
2185
2186         cFYI(1, ("readpage %p at offset %d 0x%x\n",
2187                  page, (int)offset, (int)offset));
2188
2189         rc = cifs_readpage_worker(file, page, &offset);
2190
2191         unlock_page(page);
2192
2193         FreeXid(xid);
2194         return rc;
2195 }
2196
2197 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2198 {
2199         struct cifsFileInfo *open_file;
2200
2201         read_lock(&GlobalSMBSeslock);
2202         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2203                 if (open_file->closePend)
2204                         continue;
2205                 if (open_file->pfile &&
2206                     ((open_file->pfile->f_flags & O_RDWR) ||
2207                      (open_file->pfile->f_flags & O_WRONLY))) {
2208                         read_unlock(&GlobalSMBSeslock);
2209                         return 1;
2210                 }
2211         }
2212         read_unlock(&GlobalSMBSeslock);
2213         return 0;
2214 }
2215
2216 /* We do not want to update the file size from server for inodes
2217    open for write - to avoid races with writepage extending
2218    the file - in the future we could consider allowing
2219    refreshing the inode only on increases in the file size
2220    but this is tricky to do without racing with writebehind
2221    page caching in the current Linux kernel design */
2222 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2223 {
2224         if (!cifsInode)
2225                 return true;
2226
2227         if (is_inode_writable(cifsInode)) {
2228                 /* This inode is open for write at least once */
2229                 struct cifs_sb_info *cifs_sb;
2230
2231                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2232                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2233                         /* since no page cache to corrupt on directio
2234                         we can change size safely */
2235                         return true;
2236                 }
2237
2238                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2239                         return true;
2240
2241                 return false;
2242         } else
2243                 return true;
2244 }
2245
2246 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2247                         loff_t pos, unsigned len, unsigned flags,
2248                         struct page **pagep, void **fsdata)
2249 {
2250         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2251         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2252         loff_t page_start = pos & PAGE_MASK;
2253         loff_t i_size;
2254         struct page *page;
2255         int rc = 0;
2256
2257         cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2258
2259         page = grab_cache_page_write_begin(mapping, index, flags);
2260         if (!page) {
2261                 rc = -ENOMEM;
2262                 goto out;
2263         }
2264
2265         if (PageUptodate(page))
2266                 goto out;
2267
2268         /*
2269          * If we write a full page it will be up to date, no need to read from
2270          * the server. If the write is short, we'll end up doing a sync write
2271          * instead.
2272          */
2273         if (len == PAGE_CACHE_SIZE)
2274                 goto out;
2275
2276         /*
2277          * optimize away the read when we have an oplock, and we're not
2278          * expecting to use any of the data we'd be reading in. That
2279          * is, when the page lies beyond the EOF, or straddles the EOF
2280          * and the write will cover all of the existing data.
2281          */
2282         if (CIFS_I(mapping->host)->clientCanCacheRead) {
2283                 i_size = i_size_read(mapping->host);
2284                 if (page_start >= i_size ||
2285                     (offset == 0 && (pos + len) >= i_size)) {
2286                         zero_user_segments(page, 0, offset,
2287                                            offset + len,
2288                                            PAGE_CACHE_SIZE);
2289                         /*
2290                          * PageChecked means that the parts of the page
2291                          * to which we're not writing are considered up
2292                          * to date. Once the data is copied to the
2293                          * page, it can be set uptodate.
2294                          */
2295                         SetPageChecked(page);
2296                         goto out;
2297                 }
2298         }
2299
2300         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2301                 /*
2302                  * might as well read a page, it is fast enough. If we get
2303                  * an error, we don't need to return it. cifs_write_end will
2304                  * do a sync write instead since PG_uptodate isn't set.
2305                  */
2306                 cifs_readpage_worker(file, page, &page_start);
2307         } else {
2308                 /* we could try using another file handle if there is one -
2309                    but how would we lock it to prevent close of that handle
2310                    racing with this read? In any case
2311                    this will be written out by write_end so is fine */
2312         }
2313 out:
2314         *pagep = page;
2315         return rc;
2316 }
2317
2318 static void
2319 cifs_oplock_break(struct slow_work *work)
2320 {
2321         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2322                                                   oplock_break);
2323         struct inode *inode = cfile->pInode;
2324         struct cifsInodeInfo *cinode = CIFS_I(inode);
2325         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb);
2326         int rc, waitrc = 0;
2327
2328         if (inode && S_ISREG(inode->i_mode)) {
2329 #ifdef CONFIG_CIFS_EXPERIMENTAL
2330                 if (cinode->clientCanCacheAll == 0)
2331                         break_lease(inode, FMODE_READ);
2332                 else if (cinode->clientCanCacheRead == 0)
2333                         break_lease(inode, FMODE_WRITE);
2334 #endif
2335                 rc = filemap_fdatawrite(inode->i_mapping);
2336                 if (cinode->clientCanCacheRead == 0) {
2337                         waitrc = filemap_fdatawait(inode->i_mapping);
2338                         invalidate_remote_inode(inode);
2339                 }
2340                 if (!rc)
2341                         rc = waitrc;
2342                 if (rc)
2343                         cinode->write_behind_rc = rc;
2344                 cFYI(1, ("Oplock flush inode %p rc %d", inode, rc));
2345         }
2346
2347         /*
2348          * releasing stale oplock after recent reconnect of smb session using
2349          * a now incorrect file handle is not a data integrity issue but do
2350          * not bother sending an oplock release if session to server still is
2351          * disconnected since oplock already released by the server
2352          */
2353         if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2354                 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
2355                                  LOCKING_ANDX_OPLOCK_RELEASE, false);
2356                 cFYI(1, ("Oplock release rc = %d", rc));
2357         }
2358 }
2359
2360 static int
2361 cifs_oplock_break_get(struct slow_work *work)
2362 {
2363         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2364                                                   oplock_break);
2365         mntget(cfile->mnt);
2366         cifsFileInfo_get(cfile);
2367         return 0;
2368 }
2369
2370 static void
2371 cifs_oplock_break_put(struct slow_work *work)
2372 {
2373         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2374                                                   oplock_break);
2375         mntput(cfile->mnt);
2376         cifsFileInfo_put(cfile);
2377 }
2378
2379 const struct slow_work_ops cifs_oplock_break_ops = {
2380         .get_ref        = cifs_oplock_break_get,
2381         .put_ref        = cifs_oplock_break_put,
2382         .execute        = cifs_oplock_break,
2383 };
2384
2385 const struct address_space_operations cifs_addr_ops = {
2386         .readpage = cifs_readpage,
2387         .readpages = cifs_readpages,
2388         .writepage = cifs_writepage,
2389         .writepages = cifs_writepages,
2390         .write_begin = cifs_write_begin,
2391         .write_end = cifs_write_end,
2392         .set_page_dirty = __set_page_dirty_nobuffers,
2393         /* .sync_page = cifs_sync_page, */
2394         /* .direct_IO = */
2395 };
2396
2397 /*
2398  * cifs_readpages requires the server to support a buffer large enough to
2399  * contain the header plus one complete page of data.  Otherwise, we need
2400  * to leave cifs_readpages out of the address space operations.
2401  */
2402 const struct address_space_operations cifs_addr_ops_smallbuf = {
2403         .readpage = cifs_readpage,
2404         .writepage = cifs_writepage,
2405         .writepages = cifs_writepages,
2406         .write_begin = cifs_write_begin,
2407         .write_end = cifs_write_end,
2408         .set_page_dirty = __set_page_dirty_nobuffers,
2409         /* .sync_page = cifs_sync_page, */
2410         /* .direct_IO = */
2411 };