cifs: fix oplock request handling in posix codepath
[safe/jmp/linux-2.6] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2007
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <asm/div64.h>
34 #include "cifsfs.h"
35 #include "cifspdu.h"
36 #include "cifsglob.h"
37 #include "cifsproto.h"
38 #include "cifs_unicode.h"
39 #include "cifs_debug.h"
40 #include "cifs_fs_sb.h"
41
42 static inline struct cifsFileInfo *cifs_init_private(
43         struct cifsFileInfo *private_data, struct inode *inode,
44         struct file *file, __u16 netfid)
45 {
46         memset(private_data, 0, sizeof(struct cifsFileInfo));
47         private_data->netfid = netfid;
48         private_data->pid = current->tgid;
49         mutex_init(&private_data->fh_mutex);
50         mutex_init(&private_data->lock_mutex);
51         INIT_LIST_HEAD(&private_data->llist);
52         private_data->pfile = file; /* needed for writepage */
53         private_data->pInode = inode;
54         private_data->invalidHandle = false;
55         private_data->closePend = false;
56         /* Initialize reference count to one.  The private data is
57         freed on the release of the last reference */
58         atomic_set(&private_data->count, 1);
59
60         return private_data;
61 }
62
63 static inline int cifs_convert_flags(unsigned int flags)
64 {
65         if ((flags & O_ACCMODE) == O_RDONLY)
66                 return GENERIC_READ;
67         else if ((flags & O_ACCMODE) == O_WRONLY)
68                 return GENERIC_WRITE;
69         else if ((flags & O_ACCMODE) == O_RDWR) {
70                 /* GENERIC_ALL is too much permission to request
71                    can cause unnecessary access denied on create */
72                 /* return GENERIC_ALL; */
73                 return (GENERIC_READ | GENERIC_WRITE);
74         }
75
76         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
77                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
78                 FILE_READ_DATA);
79 }
80
81 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
82 {
83         fmode_t posix_flags = 0;
84
85         if ((flags & O_ACCMODE) == O_RDONLY)
86                 posix_flags = FMODE_READ;
87         else if ((flags & O_ACCMODE) == O_WRONLY)
88                 posix_flags = FMODE_WRITE;
89         else if ((flags & O_ACCMODE) == O_RDWR) {
90                 /* GENERIC_ALL is too much permission to request
91                    can cause unnecessary access denied on create */
92                 /* return GENERIC_ALL; */
93                 posix_flags = FMODE_READ | FMODE_WRITE;
94         }
95         /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
96            reopening a file.  They had their effect on the original open */
97         if (flags & O_APPEND)
98                 posix_flags |= (fmode_t)O_APPEND;
99         if (flags & O_SYNC)
100                 posix_flags |= (fmode_t)O_SYNC;
101         if (flags & O_DIRECTORY)
102                 posix_flags |= (fmode_t)O_DIRECTORY;
103         if (flags & O_NOFOLLOW)
104                 posix_flags |= (fmode_t)O_NOFOLLOW;
105         if (flags & O_DIRECT)
106                 posix_flags |= (fmode_t)O_DIRECT;
107
108         return posix_flags;
109 }
110
111 static inline int cifs_get_disposition(unsigned int flags)
112 {
113         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
114                 return FILE_CREATE;
115         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
116                 return FILE_OVERWRITE_IF;
117         else if ((flags & O_CREAT) == O_CREAT)
118                 return FILE_OPEN_IF;
119         else if ((flags & O_TRUNC) == O_TRUNC)
120                 return FILE_OVERWRITE;
121         else
122                 return FILE_OPEN;
123 }
124
125 /* all arguments to this function must be checked for validity in caller */
126 static inline int
127 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
128                              struct cifsInodeInfo *pCifsInode,
129                              struct cifsFileInfo *pCifsFile, __u32 oplock,
130                              u16 netfid)
131 {
132
133         write_lock(&GlobalSMBSeslock);
134
135         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
136         if (pCifsInode == NULL) {
137                 write_unlock(&GlobalSMBSeslock);
138                 return -EINVAL;
139         }
140
141         if (pCifsInode->clientCanCacheRead) {
142                 /* we have the inode open somewhere else
143                    no need to discard cache data */
144                 goto psx_client_can_cache;
145         }
146
147         /* BB FIXME need to fix this check to move it earlier into posix_open
148            BB  fIX following section BB FIXME */
149
150         /* if not oplocked, invalidate inode pages if mtime or file
151            size changed */
152 /*      temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
153         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
154                            (file->f_path.dentry->d_inode->i_size ==
155                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
156                 cFYI(1, ("inode unchanged on server"));
157         } else {
158                 if (file->f_path.dentry->d_inode->i_mapping) {
159                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
160                         if (rc != 0)
161                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
162                 }
163                 cFYI(1, ("invalidating remote inode since open detected it "
164                          "changed"));
165                 invalidate_remote_inode(file->f_path.dentry->d_inode);
166         } */
167
168 psx_client_can_cache:
169         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
170                 pCifsInode->clientCanCacheAll = true;
171                 pCifsInode->clientCanCacheRead = true;
172                 cFYI(1, ("Exclusive Oplock granted on inode %p",
173                          file->f_path.dentry->d_inode));
174         } else if ((oplock & 0xF) == OPLOCK_READ)
175                 pCifsInode->clientCanCacheRead = true;
176
177         /* will have to change the unlock if we reenable the
178            filemap_fdatawrite (which does not seem necessary */
179         write_unlock(&GlobalSMBSeslock);
180         return 0;
181 }
182
183 static struct cifsFileInfo *
184 cifs_fill_filedata(struct file *file)
185 {
186         struct list_head *tmp;
187         struct cifsFileInfo *pCifsFile = NULL;
188         struct cifsInodeInfo *pCifsInode = NULL;
189
190         /* search inode for this file and fill in file->private_data */
191         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
192         read_lock(&GlobalSMBSeslock);
193         list_for_each(tmp, &pCifsInode->openFileList) {
194                 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
195                 if ((pCifsFile->pfile == NULL) &&
196                     (pCifsFile->pid == current->tgid)) {
197                         /* mode set in cifs_create */
198
199                         /* needed for writepage */
200                         pCifsFile->pfile = file;
201                         file->private_data = pCifsFile;
202                         break;
203                 }
204         }
205         read_unlock(&GlobalSMBSeslock);
206
207         if (file->private_data != NULL) {
208                 return pCifsFile;
209         } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
210                         cERROR(1, ("could not find file instance for "
211                                    "new file %p", file));
212         return NULL;
213 }
214
215 /* all arguments to this function must be checked for validity in caller */
216 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
217         struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
218         struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
219         char *full_path, int xid)
220 {
221         struct timespec temp;
222         int rc;
223
224         /* want handles we can use to read with first
225            in the list so we do not have to walk the
226            list to search for one in write_begin */
227         if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
228                 list_add_tail(&pCifsFile->flist,
229                               &pCifsInode->openFileList);
230         } else {
231                 list_add(&pCifsFile->flist,
232                          &pCifsInode->openFileList);
233         }
234         write_unlock(&GlobalSMBSeslock);
235         if (pCifsInode->clientCanCacheRead) {
236                 /* we have the inode open somewhere else
237                    no need to discard cache data */
238                 goto client_can_cache;
239         }
240
241         /* BB need same check in cifs_create too? */
242         /* if not oplocked, invalidate inode pages if mtime or file
243            size changed */
244         temp = cifs_NTtimeToUnix(buf->LastWriteTime);
245         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
246                            (file->f_path.dentry->d_inode->i_size ==
247                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
248                 cFYI(1, ("inode unchanged on server"));
249         } else {
250                 if (file->f_path.dentry->d_inode->i_mapping) {
251                 /* BB no need to lock inode until after invalidate
252                    since namei code should already have it locked? */
253                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
254                         if (rc != 0)
255                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
256                 }
257                 cFYI(1, ("invalidating remote inode since open detected it "
258                          "changed"));
259                 invalidate_remote_inode(file->f_path.dentry->d_inode);
260         }
261
262 client_can_cache:
263         if (pTcon->unix_ext)
264                 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
265                         full_path, inode->i_sb, xid);
266         else
267                 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
268                         full_path, buf, inode->i_sb, xid, NULL);
269
270         if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
271                 pCifsInode->clientCanCacheAll = true;
272                 pCifsInode->clientCanCacheRead = true;
273                 cFYI(1, ("Exclusive Oplock granted on inode %p",
274                          file->f_path.dentry->d_inode));
275         } else if ((*oplock & 0xF) == OPLOCK_READ)
276                 pCifsInode->clientCanCacheRead = true;
277
278         return rc;
279 }
280
281 int cifs_open(struct inode *inode, struct file *file)
282 {
283         int rc = -EACCES;
284         int xid;
285         __u32 oplock;
286         struct cifs_sb_info *cifs_sb;
287         struct cifsTconInfo *tcon;
288         struct cifsFileInfo *pCifsFile;
289         struct cifsInodeInfo *pCifsInode;
290         char *full_path = NULL;
291         int desiredAccess;
292         int disposition;
293         __u16 netfid;
294         FILE_ALL_INFO *buf = NULL;
295
296         xid = GetXid();
297
298         cifs_sb = CIFS_SB(inode->i_sb);
299         tcon = cifs_sb->tcon;
300
301         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
302         pCifsFile = cifs_fill_filedata(file);
303         if (pCifsFile) {
304                 rc = 0;
305                 FreeXid(xid);
306                 return rc;
307         }
308
309         full_path = build_path_from_dentry(file->f_path.dentry);
310         if (full_path == NULL) {
311                 rc = -ENOMEM;
312                 FreeXid(xid);
313                 return rc;
314         }
315
316         cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
317                  inode, file->f_flags, full_path));
318
319         if (oplockEnabled)
320                 oplock = REQ_OPLOCK;
321         else
322                 oplock = 0;
323
324         if (!tcon->broken_posix_open && tcon->unix_ext &&
325             (tcon->ses->capabilities & CAP_UNIX) &&
326             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
327                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
328                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
329                 /* can not refresh inode info since size could be stale */
330                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
331                                      cifs_sb->mnt_file_mode /* ignored */,
332                                      oflags, &oplock, &netfid, xid);
333                 if (rc == 0) {
334                         cFYI(1, ("posix open succeeded"));
335                         /* no need for special case handling of setting mode
336                            on read only files needed here */
337
338                         pCifsFile = cifs_fill_filedata(file);
339                         cifs_posix_open_inode_helper(inode, file, pCifsInode,
340                                                      pCifsFile, oplock, netfid);
341                         goto out;
342                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
343                         if (tcon->ses->serverNOS)
344                                 cERROR(1, ("server %s of type %s returned"
345                                            " unexpected error on SMB posix open"
346                                            ", disabling posix open support."
347                                            " Check if server update available.",
348                                            tcon->ses->serverName,
349                                            tcon->ses->serverNOS));
350                         tcon->broken_posix_open = true;
351                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
352                          (rc != -EOPNOTSUPP)) /* path not found or net err */
353                         goto out;
354                 /* else fallthrough to retry open the old way on network i/o
355                    or DFS errors */
356         }
357
358         desiredAccess = cifs_convert_flags(file->f_flags);
359
360 /*********************************************************************
361  *  open flag mapping table:
362  *
363  *      POSIX Flag            CIFS Disposition
364  *      ----------            ----------------
365  *      O_CREAT               FILE_OPEN_IF
366  *      O_CREAT | O_EXCL      FILE_CREATE
367  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
368  *      O_TRUNC               FILE_OVERWRITE
369  *      none of the above     FILE_OPEN
370  *
371  *      Note that there is not a direct match between disposition
372  *      FILE_SUPERSEDE (ie create whether or not file exists although
373  *      O_CREAT | O_TRUNC is similar but truncates the existing
374  *      file rather than creating a new file as FILE_SUPERSEDE does
375  *      (which uses the attributes / metadata passed in on open call)
376  *?
377  *?  O_SYNC is a reasonable match to CIFS writethrough flag
378  *?  and the read write flags match reasonably.  O_LARGEFILE
379  *?  is irrelevant because largefile support is always used
380  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
381  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
382  *********************************************************************/
383
384         disposition = cifs_get_disposition(file->f_flags);
385
386         /* BB pass O_SYNC flag through on file attributes .. BB */
387
388         /* Also refresh inode by passing in file_info buf returned by SMBOpen
389            and calling get_inode_info with returned buf (at least helps
390            non-Unix server case) */
391
392         /* BB we can not do this if this is the second open of a file
393            and the first handle has writebehind data, we might be
394            able to simply do a filemap_fdatawrite/filemap_fdatawait first */
395         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
396         if (!buf) {
397                 rc = -ENOMEM;
398                 goto out;
399         }
400
401         if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
402                 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
403                          desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
404                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
405                                  & CIFS_MOUNT_MAP_SPECIAL_CHR);
406         else
407                 rc = -EIO; /* no NT SMB support fall into legacy open below */
408
409         if (rc == -EIO) {
410                 /* Old server, try legacy style OpenX */
411                 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
412                         desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
413                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
414                                 & CIFS_MOUNT_MAP_SPECIAL_CHR);
415         }
416         if (rc) {
417                 cFYI(1, ("cifs_open returned 0x%x", rc));
418                 goto out;
419         }
420         file->private_data =
421                 kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
422         if (file->private_data == NULL) {
423                 rc = -ENOMEM;
424                 goto out;
425         }
426         pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
427         write_lock(&GlobalSMBSeslock);
428         list_add(&pCifsFile->tlist, &tcon->openFileList);
429
430         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
431         if (pCifsInode) {
432                 rc = cifs_open_inode_helper(inode, file, pCifsInode,
433                                             pCifsFile, tcon,
434                                             &oplock, buf, full_path, xid);
435         } else {
436                 write_unlock(&GlobalSMBSeslock);
437         }
438
439         if (oplock & CIFS_CREATE_ACTION) {
440                 /* time to set mode which we can not set earlier due to
441                    problems creating new read-only files */
442                 if (tcon->unix_ext) {
443                         struct cifs_unix_set_info_args args = {
444                                 .mode   = inode->i_mode,
445                                 .uid    = NO_CHANGE_64,
446                                 .gid    = NO_CHANGE_64,
447                                 .ctime  = NO_CHANGE_64,
448                                 .atime  = NO_CHANGE_64,
449                                 .mtime  = NO_CHANGE_64,
450                                 .device = 0,
451                         };
452                         CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
453                                                cifs_sb->local_nls,
454                                                cifs_sb->mnt_cifs_flags &
455                                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
456                 }
457         }
458
459 out:
460         kfree(buf);
461         kfree(full_path);
462         FreeXid(xid);
463         return rc;
464 }
465
466 /* Try to reacquire byte range locks that were released when session */
467 /* to server was lost */
468 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
469 {
470         int rc = 0;
471
472 /* BB list all locks open on this file and relock */
473
474         return rc;
475 }
476
477 static int cifs_reopen_file(struct file *file, bool can_flush)
478 {
479         int rc = -EACCES;
480         int xid;
481         __u32 oplock;
482         struct cifs_sb_info *cifs_sb;
483         struct cifsTconInfo *tcon;
484         struct cifsFileInfo *pCifsFile;
485         struct cifsInodeInfo *pCifsInode;
486         struct inode *inode;
487         char *full_path = NULL;
488         int desiredAccess;
489         int disposition = FILE_OPEN;
490         __u16 netfid;
491
492         if (file->private_data)
493                 pCifsFile = (struct cifsFileInfo *)file->private_data;
494         else
495                 return -EBADF;
496
497         xid = GetXid();
498         mutex_lock(&pCifsFile->fh_mutex);
499         if (!pCifsFile->invalidHandle) {
500                 mutex_unlock(&pCifsFile->fh_mutex);
501                 rc = 0;
502                 FreeXid(xid);
503                 return rc;
504         }
505
506         if (file->f_path.dentry == NULL) {
507                 cERROR(1, ("no valid name if dentry freed"));
508                 dump_stack();
509                 rc = -EBADF;
510                 goto reopen_error_exit;
511         }
512
513         inode = file->f_path.dentry->d_inode;
514         if (inode == NULL) {
515                 cERROR(1, ("inode not valid"));
516                 dump_stack();
517                 rc = -EBADF;
518                 goto reopen_error_exit;
519         }
520
521         cifs_sb = CIFS_SB(inode->i_sb);
522         tcon = cifs_sb->tcon;
523
524 /* can not grab rename sem here because various ops, including
525    those that already have the rename sem can end up causing writepage
526    to get called and if the server was down that means we end up here,
527    and we can never tell if the caller already has the rename_sem */
528         full_path = build_path_from_dentry(file->f_path.dentry);
529         if (full_path == NULL) {
530                 rc = -ENOMEM;
531 reopen_error_exit:
532                 mutex_unlock(&pCifsFile->fh_mutex);
533                 FreeXid(xid);
534                 return rc;
535         }
536
537         cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
538                  inode, file->f_flags, full_path));
539
540         if (oplockEnabled)
541                 oplock = REQ_OPLOCK;
542         else
543                 oplock = 0;
544
545         if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
546             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
547                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
548                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
549                 /* can not refresh inode info since size could be stale */
550                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
551                                      cifs_sb->mnt_file_mode /* ignored */,
552                                      oflags, &oplock, &netfid, xid);
553                 if (rc == 0) {
554                         cFYI(1, ("posix reopen succeeded"));
555                         goto reopen_success;
556                 }
557                 /* fallthrough to retry open the old way on errors, especially
558                    in the reconnect path it is important to retry hard */
559         }
560
561         desiredAccess = cifs_convert_flags(file->f_flags);
562
563         /* Can not refresh inode by passing in file_info buf to be returned
564            by SMBOpen and then calling get_inode_info with returned buf
565            since file might have write behind data that needs to be flushed
566            and server version of file size can be stale. If we knew for sure
567            that inode was not dirty locally we could do this */
568
569         rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
570                          CREATE_NOT_DIR, &netfid, &oplock, NULL,
571                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
572                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
573         if (rc) {
574                 mutex_unlock(&pCifsFile->fh_mutex);
575                 cFYI(1, ("cifs_open returned 0x%x", rc));
576                 cFYI(1, ("oplock: %d", oplock));
577         } else {
578 reopen_success:
579                 pCifsFile->netfid = netfid;
580                 pCifsFile->invalidHandle = false;
581                 mutex_unlock(&pCifsFile->fh_mutex);
582                 pCifsInode = CIFS_I(inode);
583                 if (pCifsInode) {
584                         if (can_flush) {
585                                 rc = filemap_write_and_wait(inode->i_mapping);
586                                 if (rc != 0)
587                                         CIFS_I(inode)->write_behind_rc = rc;
588                         /* temporarily disable caching while we
589                            go to server to get inode info */
590                                 pCifsInode->clientCanCacheAll = false;
591                                 pCifsInode->clientCanCacheRead = false;
592                                 if (tcon->unix_ext)
593                                         rc = cifs_get_inode_info_unix(&inode,
594                                                 full_path, inode->i_sb, xid);
595                                 else
596                                         rc = cifs_get_inode_info(&inode,
597                                                 full_path, NULL, inode->i_sb,
598                                                 xid, NULL);
599                         } /* else we are writing out data to server already
600                              and could deadlock if we tried to flush data, and
601                              since we do not know if we have data that would
602                              invalidate the current end of file on the server
603                              we can not go to the server to get the new inod
604                              info */
605                         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
606                                 pCifsInode->clientCanCacheAll = true;
607                                 pCifsInode->clientCanCacheRead = true;
608                                 cFYI(1, ("Exclusive Oplock granted on inode %p",
609                                          file->f_path.dentry->d_inode));
610                         } else if ((oplock & 0xF) == OPLOCK_READ) {
611                                 pCifsInode->clientCanCacheRead = true;
612                                 pCifsInode->clientCanCacheAll = false;
613                         } else {
614                                 pCifsInode->clientCanCacheRead = false;
615                                 pCifsInode->clientCanCacheAll = false;
616                         }
617                         cifs_relock_file(pCifsFile);
618                 }
619         }
620         kfree(full_path);
621         FreeXid(xid);
622         return rc;
623 }
624
625 int cifs_close(struct inode *inode, struct file *file)
626 {
627         int rc = 0;
628         int xid, timeout;
629         struct cifs_sb_info *cifs_sb;
630         struct cifsTconInfo *pTcon;
631         struct cifsFileInfo *pSMBFile =
632                 (struct cifsFileInfo *)file->private_data;
633
634         xid = GetXid();
635
636         cifs_sb = CIFS_SB(inode->i_sb);
637         pTcon = cifs_sb->tcon;
638         if (pSMBFile) {
639                 struct cifsLockInfo *li, *tmp;
640                 write_lock(&GlobalSMBSeslock);
641                 pSMBFile->closePend = true;
642                 if (pTcon) {
643                         /* no sense reconnecting to close a file that is
644                            already closed */
645                         if (!pTcon->need_reconnect) {
646                                 write_unlock(&GlobalSMBSeslock);
647                                 timeout = 2;
648                                 while ((atomic_read(&pSMBFile->count) != 1)
649                                         && (timeout <= 2048)) {
650                                         /* Give write a better chance to get to
651                                         server ahead of the close.  We do not
652                                         want to add a wait_q here as it would
653                                         increase the memory utilization as
654                                         the struct would be in each open file,
655                                         but this should give enough time to
656                                         clear the socket */
657                                         cFYI(DBG2,
658                                                 ("close delay, write pending"));
659                                         msleep(timeout);
660                                         timeout *= 4;
661                                 }
662                                 if (!pTcon->need_reconnect &&
663                                     !pSMBFile->invalidHandle)
664                                         rc = CIFSSMBClose(xid, pTcon,
665                                                   pSMBFile->netfid);
666                         } else
667                                 write_unlock(&GlobalSMBSeslock);
668                 } else
669                         write_unlock(&GlobalSMBSeslock);
670
671                 /* Delete any outstanding lock records.
672                    We'll lose them when the file is closed anyway. */
673                 mutex_lock(&pSMBFile->lock_mutex);
674                 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
675                         list_del(&li->llist);
676                         kfree(li);
677                 }
678                 mutex_unlock(&pSMBFile->lock_mutex);
679
680                 write_lock(&GlobalSMBSeslock);
681                 list_del(&pSMBFile->flist);
682                 list_del(&pSMBFile->tlist);
683                 write_unlock(&GlobalSMBSeslock);
684                 cifsFileInfo_put(file->private_data);
685                 file->private_data = NULL;
686         } else
687                 rc = -EBADF;
688
689         read_lock(&GlobalSMBSeslock);
690         if (list_empty(&(CIFS_I(inode)->openFileList))) {
691                 cFYI(1, ("closing last open instance for inode %p", inode));
692                 /* if the file is not open we do not know if we can cache info
693                    on this inode, much less write behind and read ahead */
694                 CIFS_I(inode)->clientCanCacheRead = false;
695                 CIFS_I(inode)->clientCanCacheAll  = false;
696         }
697         read_unlock(&GlobalSMBSeslock);
698         if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
699                 rc = CIFS_I(inode)->write_behind_rc;
700         FreeXid(xid);
701         return rc;
702 }
703
704 int cifs_closedir(struct inode *inode, struct file *file)
705 {
706         int rc = 0;
707         int xid;
708         struct cifsFileInfo *pCFileStruct =
709             (struct cifsFileInfo *)file->private_data;
710         char *ptmp;
711
712         cFYI(1, ("Closedir inode = 0x%p", inode));
713
714         xid = GetXid();
715
716         if (pCFileStruct) {
717                 struct cifsTconInfo *pTcon;
718                 struct cifs_sb_info *cifs_sb =
719                         CIFS_SB(file->f_path.dentry->d_sb);
720
721                 pTcon = cifs_sb->tcon;
722
723                 cFYI(1, ("Freeing private data in close dir"));
724                 write_lock(&GlobalSMBSeslock);
725                 if (!pCFileStruct->srch_inf.endOfSearch &&
726                     !pCFileStruct->invalidHandle) {
727                         pCFileStruct->invalidHandle = true;
728                         write_unlock(&GlobalSMBSeslock);
729                         rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
730                         cFYI(1, ("Closing uncompleted readdir with rc %d",
731                                  rc));
732                         /* not much we can do if it fails anyway, ignore rc */
733                         rc = 0;
734                 } else
735                         write_unlock(&GlobalSMBSeslock);
736                 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
737                 if (ptmp) {
738                         cFYI(1, ("closedir free smb buf in srch struct"));
739                         pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
740                         if (pCFileStruct->srch_inf.smallBuf)
741                                 cifs_small_buf_release(ptmp);
742                         else
743                                 cifs_buf_release(ptmp);
744                 }
745                 kfree(file->private_data);
746                 file->private_data = NULL;
747         }
748         /* BB can we lock the filestruct while this is going on? */
749         FreeXid(xid);
750         return rc;
751 }
752
753 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
754                                 __u64 offset, __u8 lockType)
755 {
756         struct cifsLockInfo *li =
757                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
758         if (li == NULL)
759                 return -ENOMEM;
760         li->offset = offset;
761         li->length = len;
762         li->type = lockType;
763         mutex_lock(&fid->lock_mutex);
764         list_add(&li->llist, &fid->llist);
765         mutex_unlock(&fid->lock_mutex);
766         return 0;
767 }
768
769 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
770 {
771         int rc, xid;
772         __u32 numLock = 0;
773         __u32 numUnlock = 0;
774         __u64 length;
775         bool wait_flag = false;
776         struct cifs_sb_info *cifs_sb;
777         struct cifsTconInfo *tcon;
778         __u16 netfid;
779         __u8 lockType = LOCKING_ANDX_LARGE_FILES;
780         bool posix_locking = 0;
781
782         length = 1 + pfLock->fl_end - pfLock->fl_start;
783         rc = -EACCES;
784         xid = GetXid();
785
786         cFYI(1, ("Lock parm: 0x%x flockflags: "
787                  "0x%x flocktype: 0x%x start: %lld end: %lld",
788                 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
789                 pfLock->fl_end));
790
791         if (pfLock->fl_flags & FL_POSIX)
792                 cFYI(1, ("Posix"));
793         if (pfLock->fl_flags & FL_FLOCK)
794                 cFYI(1, ("Flock"));
795         if (pfLock->fl_flags & FL_SLEEP) {
796                 cFYI(1, ("Blocking lock"));
797                 wait_flag = true;
798         }
799         if (pfLock->fl_flags & FL_ACCESS)
800                 cFYI(1, ("Process suspended by mandatory locking - "
801                          "not implemented yet"));
802         if (pfLock->fl_flags & FL_LEASE)
803                 cFYI(1, ("Lease on file - not implemented yet"));
804         if (pfLock->fl_flags &
805             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
806                 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags));
807
808         if (pfLock->fl_type == F_WRLCK) {
809                 cFYI(1, ("F_WRLCK "));
810                 numLock = 1;
811         } else if (pfLock->fl_type == F_UNLCK) {
812                 cFYI(1, ("F_UNLCK"));
813                 numUnlock = 1;
814                 /* Check if unlock includes more than
815                 one lock range */
816         } else if (pfLock->fl_type == F_RDLCK) {
817                 cFYI(1, ("F_RDLCK"));
818                 lockType |= LOCKING_ANDX_SHARED_LOCK;
819                 numLock = 1;
820         } else if (pfLock->fl_type == F_EXLCK) {
821                 cFYI(1, ("F_EXLCK"));
822                 numLock = 1;
823         } else if (pfLock->fl_type == F_SHLCK) {
824                 cFYI(1, ("F_SHLCK"));
825                 lockType |= LOCKING_ANDX_SHARED_LOCK;
826                 numLock = 1;
827         } else
828                 cFYI(1, ("Unknown type of lock"));
829
830         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
831         tcon = cifs_sb->tcon;
832
833         if (file->private_data == NULL) {
834                 rc = -EBADF;
835                 FreeXid(xid);
836                 return rc;
837         }
838         netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
839
840         if ((tcon->ses->capabilities & CAP_UNIX) &&
841             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
842             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
843                 posix_locking = 1;
844         /* BB add code here to normalize offset and length to
845         account for negative length which we can not accept over the
846         wire */
847         if (IS_GETLK(cmd)) {
848                 if (posix_locking) {
849                         int posix_lock_type;
850                         if (lockType & LOCKING_ANDX_SHARED_LOCK)
851                                 posix_lock_type = CIFS_RDLCK;
852                         else
853                                 posix_lock_type = CIFS_WRLCK;
854                         rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
855                                         length, pfLock,
856                                         posix_lock_type, wait_flag);
857                         FreeXid(xid);
858                         return rc;
859                 }
860
861                 /* BB we could chain these into one lock request BB */
862                 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
863                                  0, 1, lockType, 0 /* wait flag */ );
864                 if (rc == 0) {
865                         rc = CIFSSMBLock(xid, tcon, netfid, length,
866                                          pfLock->fl_start, 1 /* numUnlock */ ,
867                                          0 /* numLock */ , lockType,
868                                          0 /* wait flag */ );
869                         pfLock->fl_type = F_UNLCK;
870                         if (rc != 0)
871                                 cERROR(1, ("Error unlocking previously locked "
872                                            "range %d during test of lock", rc));
873                         rc = 0;
874
875                 } else {
876                         /* if rc == ERR_SHARING_VIOLATION ? */
877                         rc = 0; /* do not change lock type to unlock
878                                    since range in use */
879                 }
880
881                 FreeXid(xid);
882                 return rc;
883         }
884
885         if (!numLock && !numUnlock) {
886                 /* if no lock or unlock then nothing
887                 to do since we do not know what it is */
888                 FreeXid(xid);
889                 return -EOPNOTSUPP;
890         }
891
892         if (posix_locking) {
893                 int posix_lock_type;
894                 if (lockType & LOCKING_ANDX_SHARED_LOCK)
895                         posix_lock_type = CIFS_RDLCK;
896                 else
897                         posix_lock_type = CIFS_WRLCK;
898
899                 if (numUnlock == 1)
900                         posix_lock_type = CIFS_UNLCK;
901
902                 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
903                                       length, pfLock,
904                                       posix_lock_type, wait_flag);
905         } else {
906                 struct cifsFileInfo *fid =
907                         (struct cifsFileInfo *)file->private_data;
908
909                 if (numLock) {
910                         rc = CIFSSMBLock(xid, tcon, netfid, length,
911                                         pfLock->fl_start,
912                                         0, numLock, lockType, wait_flag);
913
914                         if (rc == 0) {
915                                 /* For Windows locks we must store them. */
916                                 rc = store_file_lock(fid, length,
917                                                 pfLock->fl_start, lockType);
918                         }
919                 } else if (numUnlock) {
920                         /* For each stored lock that this unlock overlaps
921                            completely, unlock it. */
922                         int stored_rc = 0;
923                         struct cifsLockInfo *li, *tmp;
924
925                         rc = 0;
926                         mutex_lock(&fid->lock_mutex);
927                         list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
928                                 if (pfLock->fl_start <= li->offset &&
929                                                 (pfLock->fl_start + length) >=
930                                                 (li->offset + li->length)) {
931                                         stored_rc = CIFSSMBLock(xid, tcon,
932                                                         netfid,
933                                                         li->length, li->offset,
934                                                         1, 0, li->type, false);
935                                         if (stored_rc)
936                                                 rc = stored_rc;
937
938                                         list_del(&li->llist);
939                                         kfree(li);
940                                 }
941                         }
942                         mutex_unlock(&fid->lock_mutex);
943                 }
944         }
945
946         if (pfLock->fl_flags & FL_POSIX)
947                 posix_lock_file_wait(file, pfLock);
948         FreeXid(xid);
949         return rc;
950 }
951
952 /*
953  * Set the timeout on write requests past EOF. For some servers (Windows)
954  * these calls can be very long.
955  *
956  * If we're writing >10M past the EOF we give a 180s timeout. Anything less
957  * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
958  * The 10M cutoff is totally arbitrary. A better scheme for this would be
959  * welcome if someone wants to suggest one.
960  *
961  * We may be able to do a better job with this if there were some way to
962  * declare that a file should be sparse.
963  */
964 static int
965 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
966 {
967         if (offset <= cifsi->server_eof)
968                 return CIFS_STD_OP;
969         else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
970                 return CIFS_VLONG_OP;
971         else
972                 return CIFS_LONG_OP;
973 }
974
975 /* update the file size (if needed) after a write */
976 static void
977 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
978                       unsigned int bytes_written)
979 {
980         loff_t end_of_write = offset + bytes_written;
981
982         if (end_of_write > cifsi->server_eof)
983                 cifsi->server_eof = end_of_write;
984 }
985
986 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
987         size_t write_size, loff_t *poffset)
988 {
989         int rc = 0;
990         unsigned int bytes_written = 0;
991         unsigned int total_written;
992         struct cifs_sb_info *cifs_sb;
993         struct cifsTconInfo *pTcon;
994         int xid, long_op;
995         struct cifsFileInfo *open_file;
996         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
997
998         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
999
1000         pTcon = cifs_sb->tcon;
1001
1002         /* cFYI(1,
1003            (" write %d bytes to offset %lld of %s", write_size,
1004            *poffset, file->f_path.dentry->d_name.name)); */
1005
1006         if (file->private_data == NULL)
1007                 return -EBADF;
1008         open_file = (struct cifsFileInfo *) file->private_data;
1009
1010         rc = generic_write_checks(file, poffset, &write_size, 0);
1011         if (rc)
1012                 return rc;
1013
1014         xid = GetXid();
1015
1016         long_op = cifs_write_timeout(cifsi, *poffset);
1017         for (total_written = 0; write_size > total_written;
1018              total_written += bytes_written) {
1019                 rc = -EAGAIN;
1020                 while (rc == -EAGAIN) {
1021                         if (file->private_data == NULL) {
1022                                 /* file has been closed on us */
1023                                 FreeXid(xid);
1024                         /* if we have gotten here we have written some data
1025                            and blocked, and the file has been freed on us while
1026                            we blocked so return what we managed to write */
1027                                 return total_written;
1028                         }
1029                         if (open_file->closePend) {
1030                                 FreeXid(xid);
1031                                 if (total_written)
1032                                         return total_written;
1033                                 else
1034                                         return -EBADF;
1035                         }
1036                         if (open_file->invalidHandle) {
1037                                 /* we could deadlock if we called
1038                                    filemap_fdatawait from here so tell
1039                                    reopen_file not to flush data to server
1040                                    now */
1041                                 rc = cifs_reopen_file(file, false);
1042                                 if (rc != 0)
1043                                         break;
1044                         }
1045
1046                         rc = CIFSSMBWrite(xid, pTcon,
1047                                 open_file->netfid,
1048                                 min_t(const int, cifs_sb->wsize,
1049                                       write_size - total_written),
1050                                 *poffset, &bytes_written,
1051                                 NULL, write_data + total_written, long_op);
1052                 }
1053                 if (rc || (bytes_written == 0)) {
1054                         if (total_written)
1055                                 break;
1056                         else {
1057                                 FreeXid(xid);
1058                                 return rc;
1059                         }
1060                 } else {
1061                         cifs_update_eof(cifsi, *poffset, bytes_written);
1062                         *poffset += bytes_written;
1063                 }
1064                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1065                                     15 seconds is plenty */
1066         }
1067
1068         cifs_stats_bytes_written(pTcon, total_written);
1069
1070         /* since the write may have blocked check these pointers again */
1071         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1072                 struct inode *inode = file->f_path.dentry->d_inode;
1073 /* Do not update local mtime - server will set its actual value on write
1074  *              inode->i_ctime = inode->i_mtime =
1075  *                      current_fs_time(inode->i_sb);*/
1076                 if (total_written > 0) {
1077                         spin_lock(&inode->i_lock);
1078                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1079                                 i_size_write(file->f_path.dentry->d_inode,
1080                                         *poffset);
1081                         spin_unlock(&inode->i_lock);
1082                 }
1083                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1084         }
1085         FreeXid(xid);
1086         return total_written;
1087 }
1088
1089 static ssize_t cifs_write(struct file *file, const char *write_data,
1090                           size_t write_size, loff_t *poffset)
1091 {
1092         int rc = 0;
1093         unsigned int bytes_written = 0;
1094         unsigned int total_written;
1095         struct cifs_sb_info *cifs_sb;
1096         struct cifsTconInfo *pTcon;
1097         int xid, long_op;
1098         struct cifsFileInfo *open_file;
1099         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1100
1101         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1102
1103         pTcon = cifs_sb->tcon;
1104
1105         cFYI(1, ("write %zd bytes to offset %lld of %s", write_size,
1106            *poffset, file->f_path.dentry->d_name.name));
1107
1108         if (file->private_data == NULL)
1109                 return -EBADF;
1110         open_file = (struct cifsFileInfo *)file->private_data;
1111
1112         xid = GetXid();
1113
1114         long_op = cifs_write_timeout(cifsi, *poffset);
1115         for (total_written = 0; write_size > total_written;
1116              total_written += bytes_written) {
1117                 rc = -EAGAIN;
1118                 while (rc == -EAGAIN) {
1119                         if (file->private_data == NULL) {
1120                                 /* file has been closed on us */
1121                                 FreeXid(xid);
1122                         /* if we have gotten here we have written some data
1123                            and blocked, and the file has been freed on us
1124                            while we blocked so return what we managed to
1125                            write */
1126                                 return total_written;
1127                         }
1128                         if (open_file->closePend) {
1129                                 FreeXid(xid);
1130                                 if (total_written)
1131                                         return total_written;
1132                                 else
1133                                         return -EBADF;
1134                         }
1135                         if (open_file->invalidHandle) {
1136                                 /* we could deadlock if we called
1137                                    filemap_fdatawait from here so tell
1138                                    reopen_file not to flush data to
1139                                    server now */
1140                                 rc = cifs_reopen_file(file, false);
1141                                 if (rc != 0)
1142                                         break;
1143                         }
1144                         if (experimEnabled || (pTcon->ses->server &&
1145                                 ((pTcon->ses->server->secMode &
1146                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1147                                 == 0))) {
1148                                 struct kvec iov[2];
1149                                 unsigned int len;
1150
1151                                 len = min((size_t)cifs_sb->wsize,
1152                                           write_size - total_written);
1153                                 /* iov[0] is reserved for smb header */
1154                                 iov[1].iov_base = (char *)write_data +
1155                                                   total_written;
1156                                 iov[1].iov_len = len;
1157                                 rc = CIFSSMBWrite2(xid, pTcon,
1158                                                 open_file->netfid, len,
1159                                                 *poffset, &bytes_written,
1160                                                 iov, 1, long_op);
1161                         } else
1162                                 rc = CIFSSMBWrite(xid, pTcon,
1163                                          open_file->netfid,
1164                                          min_t(const int, cifs_sb->wsize,
1165                                                write_size - total_written),
1166                                          *poffset, &bytes_written,
1167                                          write_data + total_written,
1168                                          NULL, long_op);
1169                 }
1170                 if (rc || (bytes_written == 0)) {
1171                         if (total_written)
1172                                 break;
1173                         else {
1174                                 FreeXid(xid);
1175                                 return rc;
1176                         }
1177                 } else {
1178                         cifs_update_eof(cifsi, *poffset, bytes_written);
1179                         *poffset += bytes_written;
1180                 }
1181                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1182                                     15 seconds is plenty */
1183         }
1184
1185         cifs_stats_bytes_written(pTcon, total_written);
1186
1187         /* since the write may have blocked check these pointers again */
1188         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1189 /*BB We could make this contingent on superblock ATIME flag too */
1190 /*              file->f_path.dentry->d_inode->i_ctime =
1191                 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1192                 if (total_written > 0) {
1193                         spin_lock(&file->f_path.dentry->d_inode->i_lock);
1194                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1195                                 i_size_write(file->f_path.dentry->d_inode,
1196                                              *poffset);
1197                         spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1198                 }
1199                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1200         }
1201         FreeXid(xid);
1202         return total_written;
1203 }
1204
1205 #ifdef CONFIG_CIFS_EXPERIMENTAL
1206 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1207 {
1208         struct cifsFileInfo *open_file = NULL;
1209
1210         read_lock(&GlobalSMBSeslock);
1211         /* we could simply get the first_list_entry since write-only entries
1212            are always at the end of the list but since the first entry might
1213            have a close pending, we go through the whole list */
1214         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1215                 if (open_file->closePend)
1216                         continue;
1217                 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1218                     (open_file->pfile->f_flags & O_RDONLY))) {
1219                         if (!open_file->invalidHandle) {
1220                                 /* found a good file */
1221                                 /* lock it so it will not be closed on us */
1222                                 cifsFileInfo_get(open_file);
1223                                 read_unlock(&GlobalSMBSeslock);
1224                                 return open_file;
1225                         } /* else might as well continue, and look for
1226                              another, or simply have the caller reopen it
1227                              again rather than trying to fix this handle */
1228                 } else /* write only file */
1229                         break; /* write only files are last so must be done */
1230         }
1231         read_unlock(&GlobalSMBSeslock);
1232         return NULL;
1233 }
1234 #endif
1235
1236 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1237 {
1238         struct cifsFileInfo *open_file;
1239         bool any_available = false;
1240         int rc;
1241
1242         /* Having a null inode here (because mapping->host was set to zero by
1243         the VFS or MM) should not happen but we had reports of on oops (due to
1244         it being zero) during stress testcases so we need to check for it */
1245
1246         if (cifs_inode == NULL) {
1247                 cERROR(1, ("Null inode passed to cifs_writeable_file"));
1248                 dump_stack();
1249                 return NULL;
1250         }
1251
1252         read_lock(&GlobalSMBSeslock);
1253 refind_writable:
1254         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1255                 if (open_file->closePend ||
1256                     (!any_available && open_file->pid != current->tgid))
1257                         continue;
1258
1259                 if (open_file->pfile &&
1260                     ((open_file->pfile->f_flags & O_RDWR) ||
1261                      (open_file->pfile->f_flags & O_WRONLY))) {
1262                         cifsFileInfo_get(open_file);
1263
1264                         if (!open_file->invalidHandle) {
1265                                 /* found a good writable file */
1266                                 read_unlock(&GlobalSMBSeslock);
1267                                 return open_file;
1268                         }
1269
1270                         read_unlock(&GlobalSMBSeslock);
1271                         /* Had to unlock since following call can block */
1272                         rc = cifs_reopen_file(open_file->pfile, false);
1273                         if (!rc) {
1274                                 if (!open_file->closePend)
1275                                         return open_file;
1276                                 else { /* start over in case this was deleted */
1277                                        /* since the list could be modified */
1278                                         read_lock(&GlobalSMBSeslock);
1279                                         cifsFileInfo_put(open_file);
1280                                         goto refind_writable;
1281                                 }
1282                         }
1283
1284                         /* if it fails, try another handle if possible -
1285                         (we can not do this if closePending since
1286                         loop could be modified - in which case we
1287                         have to start at the beginning of the list
1288                         again. Note that it would be bad
1289                         to hold up writepages here (rather than
1290                         in caller) with continuous retries */
1291                         cFYI(1, ("wp failed on reopen file"));
1292                         read_lock(&GlobalSMBSeslock);
1293                         /* can not use this handle, no write
1294                            pending on this one after all */
1295                         cifsFileInfo_put(open_file);
1296
1297                         if (open_file->closePend) /* list could have changed */
1298                                 goto refind_writable;
1299                         /* else we simply continue to the next entry. Thus
1300                            we do not loop on reopen errors.  If we
1301                            can not reopen the file, for example if we
1302                            reconnected to a server with another client
1303                            racing to delete or lock the file we would not
1304                            make progress if we restarted before the beginning
1305                            of the loop here. */
1306                 }
1307         }
1308         /* couldn't find useable FH with same pid, try any available */
1309         if (!any_available) {
1310                 any_available = true;
1311                 goto refind_writable;
1312         }
1313         read_unlock(&GlobalSMBSeslock);
1314         return NULL;
1315 }
1316
1317 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1318 {
1319         struct address_space *mapping = page->mapping;
1320         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1321         char *write_data;
1322         int rc = -EFAULT;
1323         int bytes_written = 0;
1324         struct cifs_sb_info *cifs_sb;
1325         struct cifsTconInfo *pTcon;
1326         struct inode *inode;
1327         struct cifsFileInfo *open_file;
1328
1329         if (!mapping || !mapping->host)
1330                 return -EFAULT;
1331
1332         inode = page->mapping->host;
1333         cifs_sb = CIFS_SB(inode->i_sb);
1334         pTcon = cifs_sb->tcon;
1335
1336         offset += (loff_t)from;
1337         write_data = kmap(page);
1338         write_data += from;
1339
1340         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1341                 kunmap(page);
1342                 return -EIO;
1343         }
1344
1345         /* racing with truncate? */
1346         if (offset > mapping->host->i_size) {
1347                 kunmap(page);
1348                 return 0; /* don't care */
1349         }
1350
1351         /* check to make sure that we are not extending the file */
1352         if (mapping->host->i_size - offset < (loff_t)to)
1353                 to = (unsigned)(mapping->host->i_size - offset);
1354
1355         open_file = find_writable_file(CIFS_I(mapping->host));
1356         if (open_file) {
1357                 bytes_written = cifs_write(open_file->pfile, write_data,
1358                                            to-from, &offset);
1359                 cifsFileInfo_put(open_file);
1360                 /* Does mm or vfs already set times? */
1361                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1362                 if ((bytes_written > 0) && (offset))
1363                         rc = 0;
1364                 else if (bytes_written < 0)
1365                         rc = bytes_written;
1366         } else {
1367                 cFYI(1, ("No writeable filehandles for inode"));
1368                 rc = -EIO;
1369         }
1370
1371         kunmap(page);
1372         return rc;
1373 }
1374
1375 static int cifs_writepages(struct address_space *mapping,
1376                            struct writeback_control *wbc)
1377 {
1378         struct backing_dev_info *bdi = mapping->backing_dev_info;
1379         unsigned int bytes_to_write;
1380         unsigned int bytes_written;
1381         struct cifs_sb_info *cifs_sb;
1382         int done = 0;
1383         pgoff_t end;
1384         pgoff_t index;
1385         int range_whole = 0;
1386         struct kvec *iov;
1387         int len;
1388         int n_iov = 0;
1389         pgoff_t next;
1390         int nr_pages;
1391         __u64 offset = 0;
1392         struct cifsFileInfo *open_file;
1393         struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1394         struct page *page;
1395         struct pagevec pvec;
1396         int rc = 0;
1397         int scanned = 0;
1398         int xid, long_op;
1399
1400         cifs_sb = CIFS_SB(mapping->host->i_sb);
1401
1402         /*
1403          * If wsize is smaller that the page cache size, default to writing
1404          * one page at a time via cifs_writepage
1405          */
1406         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1407                 return generic_writepages(mapping, wbc);
1408
1409         if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1410                 if (cifs_sb->tcon->ses->server->secMode &
1411                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1412                         if (!experimEnabled)
1413                                 return generic_writepages(mapping, wbc);
1414
1415         iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1416         if (iov == NULL)
1417                 return generic_writepages(mapping, wbc);
1418
1419
1420         /*
1421          * BB: Is this meaningful for a non-block-device file system?
1422          * If it is, we should test it again after we do I/O
1423          */
1424         if (wbc->nonblocking && bdi_write_congested(bdi)) {
1425                 wbc->encountered_congestion = 1;
1426                 kfree(iov);
1427                 return 0;
1428         }
1429
1430         xid = GetXid();
1431
1432         pagevec_init(&pvec, 0);
1433         if (wbc->range_cyclic) {
1434                 index = mapping->writeback_index; /* Start from prev offset */
1435                 end = -1;
1436         } else {
1437                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1438                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1439                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1440                         range_whole = 1;
1441                 scanned = 1;
1442         }
1443 retry:
1444         while (!done && (index <= end) &&
1445                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1446                         PAGECACHE_TAG_DIRTY,
1447                         min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1448                 int first;
1449                 unsigned int i;
1450
1451                 first = -1;
1452                 next = 0;
1453                 n_iov = 0;
1454                 bytes_to_write = 0;
1455
1456                 for (i = 0; i < nr_pages; i++) {
1457                         page = pvec.pages[i];
1458                         /*
1459                          * At this point we hold neither mapping->tree_lock nor
1460                          * lock on the page itself: the page may be truncated or
1461                          * invalidated (changing page->mapping to NULL), or even
1462                          * swizzled back from swapper_space to tmpfs file
1463                          * mapping
1464                          */
1465
1466                         if (first < 0)
1467                                 lock_page(page);
1468                         else if (!trylock_page(page))
1469                                 break;
1470
1471                         if (unlikely(page->mapping != mapping)) {
1472                                 unlock_page(page);
1473                                 break;
1474                         }
1475
1476                         if (!wbc->range_cyclic && page->index > end) {
1477                                 done = 1;
1478                                 unlock_page(page);
1479                                 break;
1480                         }
1481
1482                         if (next && (page->index != next)) {
1483                                 /* Not next consecutive page */
1484                                 unlock_page(page);
1485                                 break;
1486                         }
1487
1488                         if (wbc->sync_mode != WB_SYNC_NONE)
1489                                 wait_on_page_writeback(page);
1490
1491                         if (PageWriteback(page) ||
1492                                         !clear_page_dirty_for_io(page)) {
1493                                 unlock_page(page);
1494                                 break;
1495                         }
1496
1497                         /*
1498                          * This actually clears the dirty bit in the radix tree.
1499                          * See cifs_writepage() for more commentary.
1500                          */
1501                         set_page_writeback(page);
1502
1503                         if (page_offset(page) >= mapping->host->i_size) {
1504                                 done = 1;
1505                                 unlock_page(page);
1506                                 end_page_writeback(page);
1507                                 break;
1508                         }
1509
1510                         /*
1511                          * BB can we get rid of this?  pages are held by pvec
1512                          */
1513                         page_cache_get(page);
1514
1515                         len = min(mapping->host->i_size - page_offset(page),
1516                                   (loff_t)PAGE_CACHE_SIZE);
1517
1518                         /* reserve iov[0] for the smb header */
1519                         n_iov++;
1520                         iov[n_iov].iov_base = kmap(page);
1521                         iov[n_iov].iov_len = len;
1522                         bytes_to_write += len;
1523
1524                         if (first < 0) {
1525                                 first = i;
1526                                 offset = page_offset(page);
1527                         }
1528                         next = page->index + 1;
1529                         if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1530                                 break;
1531                 }
1532                 if (n_iov) {
1533                         /* Search for a writable handle every time we call
1534                          * CIFSSMBWrite2.  We can't rely on the last handle
1535                          * we used to still be valid
1536                          */
1537                         open_file = find_writable_file(CIFS_I(mapping->host));
1538                         if (!open_file) {
1539                                 cERROR(1, ("No writable handles for inode"));
1540                                 rc = -EBADF;
1541                         } else {
1542                                 long_op = cifs_write_timeout(cifsi, offset);
1543                                 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1544                                                    open_file->netfid,
1545                                                    bytes_to_write, offset,
1546                                                    &bytes_written, iov, n_iov,
1547                                                    long_op);
1548                                 cifsFileInfo_put(open_file);
1549                                 cifs_update_eof(cifsi, offset, bytes_written);
1550
1551                                 if (rc || bytes_written < bytes_to_write) {
1552                                         cERROR(1, ("Write2 ret %d, wrote %d",
1553                                                   rc, bytes_written));
1554                                         /* BB what if continued retry is
1555                                            requested via mount flags? */
1556                                         if (rc == -ENOSPC)
1557                                                 set_bit(AS_ENOSPC, &mapping->flags);
1558                                         else
1559                                                 set_bit(AS_EIO, &mapping->flags);
1560                                 } else {
1561                                         cifs_stats_bytes_written(cifs_sb->tcon,
1562                                                                  bytes_written);
1563                                 }
1564                         }
1565                         for (i = 0; i < n_iov; i++) {
1566                                 page = pvec.pages[first + i];
1567                                 /* Should we also set page error on
1568                                 success rc but too little data written? */
1569                                 /* BB investigate retry logic on temporary
1570                                 server crash cases and how recovery works
1571                                 when page marked as error */
1572                                 if (rc)
1573                                         SetPageError(page);
1574                                 kunmap(page);
1575                                 unlock_page(page);
1576                                 end_page_writeback(page);
1577                                 page_cache_release(page);
1578                         }
1579                         if ((wbc->nr_to_write -= n_iov) <= 0)
1580                                 done = 1;
1581                         index = next;
1582                 } else
1583                         /* Need to re-find the pages we skipped */
1584                         index = pvec.pages[0]->index + 1;
1585
1586                 pagevec_release(&pvec);
1587         }
1588         if (!scanned && !done) {
1589                 /*
1590                  * We hit the last page and there is more work to be done: wrap
1591                  * back to the start of the file
1592                  */
1593                 scanned = 1;
1594                 index = 0;
1595                 goto retry;
1596         }
1597         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1598                 mapping->writeback_index = index;
1599
1600         FreeXid(xid);
1601         kfree(iov);
1602         return rc;
1603 }
1604
1605 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1606 {
1607         int rc = -EFAULT;
1608         int xid;
1609
1610         xid = GetXid();
1611 /* BB add check for wbc flags */
1612         page_cache_get(page);
1613         if (!PageUptodate(page))
1614                 cFYI(1, ("ppw - page not up to date"));
1615
1616         /*
1617          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1618          *
1619          * A writepage() implementation always needs to do either this,
1620          * or re-dirty the page with "redirty_page_for_writepage()" in
1621          * the case of a failure.
1622          *
1623          * Just unlocking the page will cause the radix tree tag-bits
1624          * to fail to update with the state of the page correctly.
1625          */
1626         set_page_writeback(page);
1627         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1628         SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1629         unlock_page(page);
1630         end_page_writeback(page);
1631         page_cache_release(page);
1632         FreeXid(xid);
1633         return rc;
1634 }
1635
1636 static int cifs_write_end(struct file *file, struct address_space *mapping,
1637                         loff_t pos, unsigned len, unsigned copied,
1638                         struct page *page, void *fsdata)
1639 {
1640         int rc;
1641         struct inode *inode = mapping->host;
1642
1643         cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
1644                  page, pos, copied));
1645
1646         if (PageChecked(page)) {
1647                 if (copied == len)
1648                         SetPageUptodate(page);
1649                 ClearPageChecked(page);
1650         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1651                 SetPageUptodate(page);
1652
1653         if (!PageUptodate(page)) {
1654                 char *page_data;
1655                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1656                 int xid;
1657
1658                 xid = GetXid();
1659                 /* this is probably better than directly calling
1660                    partialpage_write since in this function the file handle is
1661                    known which we might as well leverage */
1662                 /* BB check if anything else missing out of ppw
1663                    such as updating last write time */
1664                 page_data = kmap(page);
1665                 rc = cifs_write(file, page_data + offset, copied, &pos);
1666                 /* if (rc < 0) should we set writebehind rc? */
1667                 kunmap(page);
1668
1669                 FreeXid(xid);
1670         } else {
1671                 rc = copied;
1672                 pos += copied;
1673                 set_page_dirty(page);
1674         }
1675
1676         if (rc > 0) {
1677                 spin_lock(&inode->i_lock);
1678                 if (pos > inode->i_size)
1679                         i_size_write(inode, pos);
1680                 spin_unlock(&inode->i_lock);
1681         }
1682
1683         unlock_page(page);
1684         page_cache_release(page);
1685
1686         return rc;
1687 }
1688
1689 int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1690 {
1691         int xid;
1692         int rc = 0;
1693         struct cifsTconInfo *tcon;
1694         struct cifsFileInfo *smbfile =
1695                 (struct cifsFileInfo *)file->private_data;
1696         struct inode *inode = file->f_path.dentry->d_inode;
1697
1698         xid = GetXid();
1699
1700         cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1701                 dentry->d_name.name, datasync));
1702
1703         rc = filemap_write_and_wait(inode->i_mapping);
1704         if (rc == 0) {
1705                 rc = CIFS_I(inode)->write_behind_rc;
1706                 CIFS_I(inode)->write_behind_rc = 0;
1707                 tcon = CIFS_SB(inode->i_sb)->tcon;
1708                 if (!rc && tcon && smbfile &&
1709                    !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1710                         rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1711         }
1712
1713         FreeXid(xid);
1714         return rc;
1715 }
1716
1717 /* static void cifs_sync_page(struct page *page)
1718 {
1719         struct address_space *mapping;
1720         struct inode *inode;
1721         unsigned long index = page->index;
1722         unsigned int rpages = 0;
1723         int rc = 0;
1724
1725         cFYI(1, ("sync page %p",page));
1726         mapping = page->mapping;
1727         if (!mapping)
1728                 return 0;
1729         inode = mapping->host;
1730         if (!inode)
1731                 return; */
1732
1733 /*      fill in rpages then
1734         result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1735
1736 /*      cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1737
1738 #if 0
1739         if (rc < 0)
1740                 return rc;
1741         return 0;
1742 #endif
1743 } */
1744
1745 /*
1746  * As file closes, flush all cached write data for this inode checking
1747  * for write behind errors.
1748  */
1749 int cifs_flush(struct file *file, fl_owner_t id)
1750 {
1751         struct inode *inode = file->f_path.dentry->d_inode;
1752         int rc = 0;
1753
1754         /* Rather than do the steps manually:
1755            lock the inode for writing
1756            loop through pages looking for write behind data (dirty pages)
1757            coalesce into contiguous 16K (or smaller) chunks to write to server
1758            send to server (prefer in parallel)
1759            deal with writebehind errors
1760            unlock inode for writing
1761            filemapfdatawrite appears easier for the time being */
1762
1763         rc = filemap_fdatawrite(inode->i_mapping);
1764         /* reset wb rc if we were able to write out dirty pages */
1765         if (!rc) {
1766                 rc = CIFS_I(inode)->write_behind_rc;
1767                 CIFS_I(inode)->write_behind_rc = 0;
1768         }
1769
1770         cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc));
1771
1772         return rc;
1773 }
1774
1775 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1776         size_t read_size, loff_t *poffset)
1777 {
1778         int rc = -EACCES;
1779         unsigned int bytes_read = 0;
1780         unsigned int total_read = 0;
1781         unsigned int current_read_size;
1782         struct cifs_sb_info *cifs_sb;
1783         struct cifsTconInfo *pTcon;
1784         int xid;
1785         struct cifsFileInfo *open_file;
1786         char *smb_read_data;
1787         char __user *current_offset;
1788         struct smb_com_read_rsp *pSMBr;
1789
1790         xid = GetXid();
1791         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1792         pTcon = cifs_sb->tcon;
1793
1794         if (file->private_data == NULL) {
1795                 rc = -EBADF;
1796                 FreeXid(xid);
1797                 return rc;
1798         }
1799         open_file = (struct cifsFileInfo *)file->private_data;
1800
1801         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1802                 cFYI(1, ("attempting read on write only file instance"));
1803
1804         for (total_read = 0, current_offset = read_data;
1805              read_size > total_read;
1806              total_read += bytes_read, current_offset += bytes_read) {
1807                 current_read_size = min_t(const int, read_size - total_read,
1808                                           cifs_sb->rsize);
1809                 rc = -EAGAIN;
1810                 smb_read_data = NULL;
1811                 while (rc == -EAGAIN) {
1812                         int buf_type = CIFS_NO_BUFFER;
1813                         if ((open_file->invalidHandle) &&
1814                             (!open_file->closePend)) {
1815                                 rc = cifs_reopen_file(file, true);
1816                                 if (rc != 0)
1817                                         break;
1818                         }
1819                         rc = CIFSSMBRead(xid, pTcon,
1820                                          open_file->netfid,
1821                                          current_read_size, *poffset,
1822                                          &bytes_read, &smb_read_data,
1823                                          &buf_type);
1824                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1825                         if (smb_read_data) {
1826                                 if (copy_to_user(current_offset,
1827                                                 smb_read_data +
1828                                                 4 /* RFC1001 length field */ +
1829                                                 le16_to_cpu(pSMBr->DataOffset),
1830                                                 bytes_read))
1831                                         rc = -EFAULT;
1832
1833                                 if (buf_type == CIFS_SMALL_BUFFER)
1834                                         cifs_small_buf_release(smb_read_data);
1835                                 else if (buf_type == CIFS_LARGE_BUFFER)
1836                                         cifs_buf_release(smb_read_data);
1837                                 smb_read_data = NULL;
1838                         }
1839                 }
1840                 if (rc || (bytes_read == 0)) {
1841                         if (total_read) {
1842                                 break;
1843                         } else {
1844                                 FreeXid(xid);
1845                                 return rc;
1846                         }
1847                 } else {
1848                         cifs_stats_bytes_read(pTcon, bytes_read);
1849                         *poffset += bytes_read;
1850                 }
1851         }
1852         FreeXid(xid);
1853         return total_read;
1854 }
1855
1856
1857 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1858         loff_t *poffset)
1859 {
1860         int rc = -EACCES;
1861         unsigned int bytes_read = 0;
1862         unsigned int total_read;
1863         unsigned int current_read_size;
1864         struct cifs_sb_info *cifs_sb;
1865         struct cifsTconInfo *pTcon;
1866         int xid;
1867         char *current_offset;
1868         struct cifsFileInfo *open_file;
1869         int buf_type = CIFS_NO_BUFFER;
1870
1871         xid = GetXid();
1872         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1873         pTcon = cifs_sb->tcon;
1874
1875         if (file->private_data == NULL) {
1876                 rc = -EBADF;
1877                 FreeXid(xid);
1878                 return rc;
1879         }
1880         open_file = (struct cifsFileInfo *)file->private_data;
1881
1882         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1883                 cFYI(1, ("attempting read on write only file instance"));
1884
1885         for (total_read = 0, current_offset = read_data;
1886              read_size > total_read;
1887              total_read += bytes_read, current_offset += bytes_read) {
1888                 current_read_size = min_t(const int, read_size - total_read,
1889                                           cifs_sb->rsize);
1890                 /* For windows me and 9x we do not want to request more
1891                 than it negotiated since it will refuse the read then */
1892                 if ((pTcon->ses) &&
1893                         !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1894                         current_read_size = min_t(const int, current_read_size,
1895                                         pTcon->ses->server->maxBuf - 128);
1896                 }
1897                 rc = -EAGAIN;
1898                 while (rc == -EAGAIN) {
1899                         if ((open_file->invalidHandle) &&
1900                             (!open_file->closePend)) {
1901                                 rc = cifs_reopen_file(file, true);
1902                                 if (rc != 0)
1903                                         break;
1904                         }
1905                         rc = CIFSSMBRead(xid, pTcon,
1906                                          open_file->netfid,
1907                                          current_read_size, *poffset,
1908                                          &bytes_read, &current_offset,
1909                                          &buf_type);
1910                 }
1911                 if (rc || (bytes_read == 0)) {
1912                         if (total_read) {
1913                                 break;
1914                         } else {
1915                                 FreeXid(xid);
1916                                 return rc;
1917                         }
1918                 } else {
1919                         cifs_stats_bytes_read(pTcon, total_read);
1920                         *poffset += bytes_read;
1921                 }
1922         }
1923         FreeXid(xid);
1924         return total_read;
1925 }
1926
1927 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1928 {
1929         struct dentry *dentry = file->f_path.dentry;
1930         int rc, xid;
1931
1932         xid = GetXid();
1933         rc = cifs_revalidate(dentry);
1934         if (rc) {
1935                 cFYI(1, ("Validation prior to mmap failed, error=%d", rc));
1936                 FreeXid(xid);
1937                 return rc;
1938         }
1939         rc = generic_file_mmap(file, vma);
1940         FreeXid(xid);
1941         return rc;
1942 }
1943
1944
1945 static void cifs_copy_cache_pages(struct address_space *mapping,
1946         struct list_head *pages, int bytes_read, char *data,
1947         struct pagevec *plru_pvec)
1948 {
1949         struct page *page;
1950         char *target;
1951
1952         while (bytes_read > 0) {
1953                 if (list_empty(pages))
1954                         break;
1955
1956                 page = list_entry(pages->prev, struct page, lru);
1957                 list_del(&page->lru);
1958
1959                 if (add_to_page_cache(page, mapping, page->index,
1960                                       GFP_KERNEL)) {
1961                         page_cache_release(page);
1962                         cFYI(1, ("Add page cache failed"));
1963                         data += PAGE_CACHE_SIZE;
1964                         bytes_read -= PAGE_CACHE_SIZE;
1965                         continue;
1966                 }
1967
1968                 target = kmap_atomic(page, KM_USER0);
1969
1970                 if (PAGE_CACHE_SIZE > bytes_read) {
1971                         memcpy(target, data, bytes_read);
1972                         /* zero the tail end of this partial page */
1973                         memset(target + bytes_read, 0,
1974                                PAGE_CACHE_SIZE - bytes_read);
1975                         bytes_read = 0;
1976                 } else {
1977                         memcpy(target, data, PAGE_CACHE_SIZE);
1978                         bytes_read -= PAGE_CACHE_SIZE;
1979                 }
1980                 kunmap_atomic(target, KM_USER0);
1981
1982                 flush_dcache_page(page);
1983                 SetPageUptodate(page);
1984                 unlock_page(page);
1985                 if (!pagevec_add(plru_pvec, page))
1986                         __pagevec_lru_add_file(plru_pvec);
1987                 data += PAGE_CACHE_SIZE;
1988         }
1989         return;
1990 }
1991
1992 static int cifs_readpages(struct file *file, struct address_space *mapping,
1993         struct list_head *page_list, unsigned num_pages)
1994 {
1995         int rc = -EACCES;
1996         int xid;
1997         loff_t offset;
1998         struct page *page;
1999         struct cifs_sb_info *cifs_sb;
2000         struct cifsTconInfo *pTcon;
2001         unsigned int bytes_read = 0;
2002         unsigned int read_size, i;
2003         char *smb_read_data = NULL;
2004         struct smb_com_read_rsp *pSMBr;
2005         struct pagevec lru_pvec;
2006         struct cifsFileInfo *open_file;
2007         int buf_type = CIFS_NO_BUFFER;
2008
2009         xid = GetXid();
2010         if (file->private_data == NULL) {
2011                 rc = -EBADF;
2012                 FreeXid(xid);
2013                 return rc;
2014         }
2015         open_file = (struct cifsFileInfo *)file->private_data;
2016         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2017         pTcon = cifs_sb->tcon;
2018
2019         pagevec_init(&lru_pvec, 0);
2020         cFYI(DBG2, ("rpages: num pages %d", num_pages));
2021         for (i = 0; i < num_pages; ) {
2022                 unsigned contig_pages;
2023                 struct page *tmp_page;
2024                 unsigned long expected_index;
2025
2026                 if (list_empty(page_list))
2027                         break;
2028
2029                 page = list_entry(page_list->prev, struct page, lru);
2030                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2031
2032                 /* count adjacent pages that we will read into */
2033                 contig_pages = 0;
2034                 expected_index =
2035                         list_entry(page_list->prev, struct page, lru)->index;
2036                 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2037                         if (tmp_page->index == expected_index) {
2038                                 contig_pages++;
2039                                 expected_index++;
2040                         } else
2041                                 break;
2042                 }
2043                 if (contig_pages + i >  num_pages)
2044                         contig_pages = num_pages - i;
2045
2046                 /* for reads over a certain size could initiate async
2047                    read ahead */
2048
2049                 read_size = contig_pages * PAGE_CACHE_SIZE;
2050                 /* Read size needs to be in multiples of one page */
2051                 read_size = min_t(const unsigned int, read_size,
2052                                   cifs_sb->rsize & PAGE_CACHE_MASK);
2053                 cFYI(DBG2, ("rpages: read size 0x%x  contiguous pages %d",
2054                                 read_size, contig_pages));
2055                 rc = -EAGAIN;
2056                 while (rc == -EAGAIN) {
2057                         if ((open_file->invalidHandle) &&
2058                             (!open_file->closePend)) {
2059                                 rc = cifs_reopen_file(file, true);
2060                                 if (rc != 0)
2061                                         break;
2062                         }
2063
2064                         rc = CIFSSMBRead(xid, pTcon,
2065                                          open_file->netfid,
2066                                          read_size, offset,
2067                                          &bytes_read, &smb_read_data,
2068                                          &buf_type);
2069                         /* BB more RC checks ? */
2070                         if (rc == -EAGAIN) {
2071                                 if (smb_read_data) {
2072                                         if (buf_type == CIFS_SMALL_BUFFER)
2073                                                 cifs_small_buf_release(smb_read_data);
2074                                         else if (buf_type == CIFS_LARGE_BUFFER)
2075                                                 cifs_buf_release(smb_read_data);
2076                                         smb_read_data = NULL;
2077                                 }
2078                         }
2079                 }
2080                 if ((rc < 0) || (smb_read_data == NULL)) {
2081                         cFYI(1, ("Read error in readpages: %d", rc));
2082                         break;
2083                 } else if (bytes_read > 0) {
2084                         task_io_account_read(bytes_read);
2085                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2086                         cifs_copy_cache_pages(mapping, page_list, bytes_read,
2087                                 smb_read_data + 4 /* RFC1001 hdr */ +
2088                                 le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
2089
2090                         i +=  bytes_read >> PAGE_CACHE_SHIFT;
2091                         cifs_stats_bytes_read(pTcon, bytes_read);
2092                         if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2093                                 i++; /* account for partial page */
2094
2095                                 /* server copy of file can have smaller size
2096                                    than client */
2097                                 /* BB do we need to verify this common case ?
2098                                    this case is ok - if we are at server EOF
2099                                    we will hit it on next read */
2100
2101                                 /* break; */
2102                         }
2103                 } else {
2104                         cFYI(1, ("No bytes read (%d) at offset %lld . "
2105                                  "Cleaning remaining pages from readahead list",
2106                                  bytes_read, offset));
2107                         /* BB turn off caching and do new lookup on
2108                            file size at server? */
2109                         break;
2110                 }
2111                 if (smb_read_data) {
2112                         if (buf_type == CIFS_SMALL_BUFFER)
2113                                 cifs_small_buf_release(smb_read_data);
2114                         else if (buf_type == CIFS_LARGE_BUFFER)
2115                                 cifs_buf_release(smb_read_data);
2116                         smb_read_data = NULL;
2117                 }
2118                 bytes_read = 0;
2119         }
2120
2121         pagevec_lru_add_file(&lru_pvec);
2122
2123 /* need to free smb_read_data buf before exit */
2124         if (smb_read_data) {
2125                 if (buf_type == CIFS_SMALL_BUFFER)
2126                         cifs_small_buf_release(smb_read_data);
2127                 else if (buf_type == CIFS_LARGE_BUFFER)
2128                         cifs_buf_release(smb_read_data);
2129                 smb_read_data = NULL;
2130         }
2131
2132         FreeXid(xid);
2133         return rc;
2134 }
2135
2136 static int cifs_readpage_worker(struct file *file, struct page *page,
2137         loff_t *poffset)
2138 {
2139         char *read_data;
2140         int rc;
2141
2142         page_cache_get(page);
2143         read_data = kmap(page);
2144         /* for reads over a certain size could initiate async read ahead */
2145
2146         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2147
2148         if (rc < 0)
2149                 goto io_error;
2150         else
2151                 cFYI(1, ("Bytes read %d", rc));
2152
2153         file->f_path.dentry->d_inode->i_atime =
2154                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2155
2156         if (PAGE_CACHE_SIZE > rc)
2157                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2158
2159         flush_dcache_page(page);
2160         SetPageUptodate(page);
2161         rc = 0;
2162
2163 io_error:
2164         kunmap(page);
2165         page_cache_release(page);
2166         return rc;
2167 }
2168
2169 static int cifs_readpage(struct file *file, struct page *page)
2170 {
2171         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2172         int rc = -EACCES;
2173         int xid;
2174
2175         xid = GetXid();
2176
2177         if (file->private_data == NULL) {
2178                 rc = -EBADF;
2179                 FreeXid(xid);
2180                 return rc;
2181         }
2182
2183         cFYI(1, ("readpage %p at offset %d 0x%x\n",
2184                  page, (int)offset, (int)offset));
2185
2186         rc = cifs_readpage_worker(file, page, &offset);
2187
2188         unlock_page(page);
2189
2190         FreeXid(xid);
2191         return rc;
2192 }
2193
2194 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2195 {
2196         struct cifsFileInfo *open_file;
2197
2198         read_lock(&GlobalSMBSeslock);
2199         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2200                 if (open_file->closePend)
2201                         continue;
2202                 if (open_file->pfile &&
2203                     ((open_file->pfile->f_flags & O_RDWR) ||
2204                      (open_file->pfile->f_flags & O_WRONLY))) {
2205                         read_unlock(&GlobalSMBSeslock);
2206                         return 1;
2207                 }
2208         }
2209         read_unlock(&GlobalSMBSeslock);
2210         return 0;
2211 }
2212
2213 /* We do not want to update the file size from server for inodes
2214    open for write - to avoid races with writepage extending
2215    the file - in the future we could consider allowing
2216    refreshing the inode only on increases in the file size
2217    but this is tricky to do without racing with writebehind
2218    page caching in the current Linux kernel design */
2219 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2220 {
2221         if (!cifsInode)
2222                 return true;
2223
2224         if (is_inode_writable(cifsInode)) {
2225                 /* This inode is open for write at least once */
2226                 struct cifs_sb_info *cifs_sb;
2227
2228                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2229                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2230                         /* since no page cache to corrupt on directio
2231                         we can change size safely */
2232                         return true;
2233                 }
2234
2235                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2236                         return true;
2237
2238                 return false;
2239         } else
2240                 return true;
2241 }
2242
2243 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2244                         loff_t pos, unsigned len, unsigned flags,
2245                         struct page **pagep, void **fsdata)
2246 {
2247         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2248         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2249         loff_t page_start = pos & PAGE_MASK;
2250         loff_t i_size;
2251         struct page *page;
2252         int rc = 0;
2253
2254         cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2255
2256         page = grab_cache_page_write_begin(mapping, index, flags);
2257         if (!page) {
2258                 rc = -ENOMEM;
2259                 goto out;
2260         }
2261
2262         if (PageUptodate(page))
2263                 goto out;
2264
2265         /*
2266          * If we write a full page it will be up to date, no need to read from
2267          * the server. If the write is short, we'll end up doing a sync write
2268          * instead.
2269          */
2270         if (len == PAGE_CACHE_SIZE)
2271                 goto out;
2272
2273         /*
2274          * optimize away the read when we have an oplock, and we're not
2275          * expecting to use any of the data we'd be reading in. That
2276          * is, when the page lies beyond the EOF, or straddles the EOF
2277          * and the write will cover all of the existing data.
2278          */
2279         if (CIFS_I(mapping->host)->clientCanCacheRead) {
2280                 i_size = i_size_read(mapping->host);
2281                 if (page_start >= i_size ||
2282                     (offset == 0 && (pos + len) >= i_size)) {
2283                         zero_user_segments(page, 0, offset,
2284                                            offset + len,
2285                                            PAGE_CACHE_SIZE);
2286                         /*
2287                          * PageChecked means that the parts of the page
2288                          * to which we're not writing are considered up
2289                          * to date. Once the data is copied to the
2290                          * page, it can be set uptodate.
2291                          */
2292                         SetPageChecked(page);
2293                         goto out;
2294                 }
2295         }
2296
2297         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2298                 /*
2299                  * might as well read a page, it is fast enough. If we get
2300                  * an error, we don't need to return it. cifs_write_end will
2301                  * do a sync write instead since PG_uptodate isn't set.
2302                  */
2303                 cifs_readpage_worker(file, page, &page_start);
2304         } else {
2305                 /* we could try using another file handle if there is one -
2306                    but how would we lock it to prevent close of that handle
2307                    racing with this read? In any case
2308                    this will be written out by write_end so is fine */
2309         }
2310 out:
2311         *pagep = page;
2312         return rc;
2313 }
2314
2315 const struct address_space_operations cifs_addr_ops = {
2316         .readpage = cifs_readpage,
2317         .readpages = cifs_readpages,
2318         .writepage = cifs_writepage,
2319         .writepages = cifs_writepages,
2320         .write_begin = cifs_write_begin,
2321         .write_end = cifs_write_end,
2322         .set_page_dirty = __set_page_dirty_nobuffers,
2323         /* .sync_page = cifs_sync_page, */
2324         /* .direct_IO = */
2325 };
2326
2327 /*
2328  * cifs_readpages requires the server to support a buffer large enough to
2329  * contain the header plus one complete page of data.  Otherwise, we need
2330  * to leave cifs_readpages out of the address space operations.
2331  */
2332 const struct address_space_operations cifs_addr_ops_smallbuf = {
2333         .readpage = cifs_readpage,
2334         .writepage = cifs_writepage,
2335         .writepages = cifs_writepages,
2336         .write_begin = cifs_write_begin,
2337         .write_end = cifs_write_end,
2338         .set_page_dirty = __set_page_dirty_nobuffers,
2339         /* .sync_page = cifs_sync_page, */
2340         /* .direct_IO = */
2341 };