[CIFS] work around bug in Samba server handling for posix open
[safe/jmp/linux-2.6] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2007
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <asm/div64.h>
34 #include "cifsfs.h"
35 #include "cifspdu.h"
36 #include "cifsglob.h"
37 #include "cifsproto.h"
38 #include "cifs_unicode.h"
39 #include "cifs_debug.h"
40 #include "cifs_fs_sb.h"
41
42 static inline struct cifsFileInfo *cifs_init_private(
43         struct cifsFileInfo *private_data, struct inode *inode,
44         struct file *file, __u16 netfid)
45 {
46         memset(private_data, 0, sizeof(struct cifsFileInfo));
47         private_data->netfid = netfid;
48         private_data->pid = current->tgid;
49         init_MUTEX(&private_data->fh_sem);
50         mutex_init(&private_data->lock_mutex);
51         INIT_LIST_HEAD(&private_data->llist);
52         private_data->pfile = file; /* needed for writepage */
53         private_data->pInode = inode;
54         private_data->invalidHandle = false;
55         private_data->closePend = false;
56         /* we have to track num writers to the inode, since writepages
57         does not tell us which handle the write is for so there can
58         be a close (overlapping with write) of the filehandle that
59         cifs_writepages chose to use */
60         atomic_set(&private_data->wrtPending, 0);
61
62         return private_data;
63 }
64
65 static inline int cifs_convert_flags(unsigned int flags)
66 {
67         if ((flags & O_ACCMODE) == O_RDONLY)
68                 return GENERIC_READ;
69         else if ((flags & O_ACCMODE) == O_WRONLY)
70                 return GENERIC_WRITE;
71         else if ((flags & O_ACCMODE) == O_RDWR) {
72                 /* GENERIC_ALL is too much permission to request
73                    can cause unnecessary access denied on create */
74                 /* return GENERIC_ALL; */
75                 return (GENERIC_READ | GENERIC_WRITE);
76         }
77
78         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
79                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
80                 FILE_READ_DATA);
81 }
82
83 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
84 {
85         fmode_t posix_flags = 0;
86
87         if ((flags & O_ACCMODE) == O_RDONLY)
88                 posix_flags = FMODE_READ;
89         else if ((flags & O_ACCMODE) == O_WRONLY)
90                 posix_flags = FMODE_WRITE;
91         else if ((flags & O_ACCMODE) == O_RDWR) {
92                 /* GENERIC_ALL is too much permission to request
93                    can cause unnecessary access denied on create */
94                 /* return GENERIC_ALL; */
95                 posix_flags = FMODE_READ | FMODE_WRITE;
96         }
97         /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
98            reopening a file.  They had their effect on the original open */
99         if (flags & O_APPEND)
100                 posix_flags |= (fmode_t)O_APPEND;
101         if (flags & O_SYNC)
102                 posix_flags |= (fmode_t)O_SYNC;
103         if (flags & O_DIRECTORY)
104                 posix_flags |= (fmode_t)O_DIRECTORY;
105         if (flags & O_NOFOLLOW)
106                 posix_flags |= (fmode_t)O_NOFOLLOW;
107         if (flags & O_DIRECT)
108                 posix_flags |= (fmode_t)O_DIRECT;
109
110         return posix_flags;
111 }
112
113 static inline int cifs_get_disposition(unsigned int flags)
114 {
115         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
116                 return FILE_CREATE;
117         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
118                 return FILE_OVERWRITE_IF;
119         else if ((flags & O_CREAT) == O_CREAT)
120                 return FILE_OPEN_IF;
121         else if ((flags & O_TRUNC) == O_TRUNC)
122                 return FILE_OVERWRITE;
123         else
124                 return FILE_OPEN;
125 }
126
127 /* all arguments to this function must be checked for validity in caller */
128 static inline int cifs_posix_open_inode_helper(struct inode *inode,
129                         struct file *file, struct cifsInodeInfo *pCifsInode,
130                         struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
131 {
132         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
133 /*      struct timespec temp; */   /* BB REMOVEME BB */
134
135         file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
136         if (file->private_data == NULL)
137                 return -ENOMEM;
138         pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
139         write_lock(&GlobalSMBSeslock);
140         list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
141
142         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
143         if (pCifsInode == NULL) {
144                 write_unlock(&GlobalSMBSeslock);
145                 return -EINVAL;
146         }
147
148         /* want handles we can use to read with first
149            in the list so we do not have to walk the
150            list to search for one in write_begin */
151         if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
152                 list_add_tail(&pCifsFile->flist,
153                               &pCifsInode->openFileList);
154         } else {
155                 list_add(&pCifsFile->flist,
156                          &pCifsInode->openFileList);
157         }
158
159         if (pCifsInode->clientCanCacheRead) {
160                 /* we have the inode open somewhere else
161                    no need to discard cache data */
162                 goto psx_client_can_cache;
163         }
164
165         /* BB FIXME need to fix this check to move it earlier into posix_open
166            BB  fIX following section BB FIXME */
167
168         /* if not oplocked, invalidate inode pages if mtime or file
169            size changed */
170 /*      temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
171         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
172                            (file->f_path.dentry->d_inode->i_size ==
173                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
174                 cFYI(1, ("inode unchanged on server"));
175         } else {
176                 if (file->f_path.dentry->d_inode->i_mapping) {
177                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
178                         if (rc != 0)
179                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
180                 }
181                 cFYI(1, ("invalidating remote inode since open detected it "
182                          "changed"));
183                 invalidate_remote_inode(file->f_path.dentry->d_inode);
184         } */
185
186 psx_client_can_cache:
187         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
188                 pCifsInode->clientCanCacheAll = true;
189                 pCifsInode->clientCanCacheRead = true;
190                 cFYI(1, ("Exclusive Oplock granted on inode %p",
191                          file->f_path.dentry->d_inode));
192         } else if ((oplock & 0xF) == OPLOCK_READ)
193                 pCifsInode->clientCanCacheRead = true;
194
195         /* will have to change the unlock if we reenable the
196            filemap_fdatawrite (which does not seem necessary */
197         write_unlock(&GlobalSMBSeslock);
198         return 0;
199 }
200
201 /* all arguments to this function must be checked for validity in caller */
202 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
203         struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
204         struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
205         char *full_path, int xid)
206 {
207         struct timespec temp;
208         int rc;
209
210         /* want handles we can use to read with first
211            in the list so we do not have to walk the
212            list to search for one in write_begin */
213         if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
214                 list_add_tail(&pCifsFile->flist,
215                               &pCifsInode->openFileList);
216         } else {
217                 list_add(&pCifsFile->flist,
218                          &pCifsInode->openFileList);
219         }
220         write_unlock(&GlobalSMBSeslock);
221         if (pCifsInode->clientCanCacheRead) {
222                 /* we have the inode open somewhere else
223                    no need to discard cache data */
224                 goto client_can_cache;
225         }
226
227         /* BB need same check in cifs_create too? */
228         /* if not oplocked, invalidate inode pages if mtime or file
229            size changed */
230         temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
231         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
232                            (file->f_path.dentry->d_inode->i_size ==
233                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
234                 cFYI(1, ("inode unchanged on server"));
235         } else {
236                 if (file->f_path.dentry->d_inode->i_mapping) {
237                 /* BB no need to lock inode until after invalidate
238                    since namei code should already have it locked? */
239                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
240                         if (rc != 0)
241                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
242                 }
243                 cFYI(1, ("invalidating remote inode since open detected it "
244                          "changed"));
245                 invalidate_remote_inode(file->f_path.dentry->d_inode);
246         }
247
248 client_can_cache:
249         if (pTcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
251                         full_path, inode->i_sb, xid);
252         else
253                 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
254                         full_path, buf, inode->i_sb, xid, NULL);
255
256         if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
257                 pCifsInode->clientCanCacheAll = true;
258                 pCifsInode->clientCanCacheRead = true;
259                 cFYI(1, ("Exclusive Oplock granted on inode %p",
260                          file->f_path.dentry->d_inode));
261         } else if ((*oplock & 0xF) == OPLOCK_READ)
262                 pCifsInode->clientCanCacheRead = true;
263
264         return rc;
265 }
266
267 int cifs_open(struct inode *inode, struct file *file)
268 {
269         int rc = -EACCES;
270         int xid, oplock;
271         struct cifs_sb_info *cifs_sb;
272         struct cifsTconInfo *tcon;
273         struct cifsFileInfo *pCifsFile;
274         struct cifsInodeInfo *pCifsInode;
275         struct list_head *tmp;
276         char *full_path = NULL;
277         int desiredAccess;
278         int disposition;
279         __u16 netfid;
280         FILE_ALL_INFO *buf = NULL;
281
282         xid = GetXid();
283
284         cifs_sb = CIFS_SB(inode->i_sb);
285         tcon = cifs_sb->tcon;
286
287         if (file->f_flags & O_CREAT) {
288                 /* search inode for this file and fill in file->private_data */
289                 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
290                 read_lock(&GlobalSMBSeslock);
291                 list_for_each(tmp, &pCifsInode->openFileList) {
292                         pCifsFile = list_entry(tmp, struct cifsFileInfo,
293                                                flist);
294                         if ((pCifsFile->pfile == NULL) &&
295                             (pCifsFile->pid == current->tgid)) {
296                                 /* mode set in cifs_create */
297
298                                 /* needed for writepage */
299                                 pCifsFile->pfile = file;
300
301                                 file->private_data = pCifsFile;
302                                 break;
303                         }
304                 }
305                 read_unlock(&GlobalSMBSeslock);
306                 if (file->private_data != NULL) {
307                         rc = 0;
308                         FreeXid(xid);
309                         return rc;
310                 } else {
311                         if (file->f_flags & O_EXCL)
312                                 cERROR(1, ("could not find file instance for "
313                                            "new file %p", file));
314                 }
315         }
316
317         full_path = build_path_from_dentry(file->f_path.dentry);
318         if (full_path == NULL) {
319                 FreeXid(xid);
320                 return -ENOMEM;
321         }
322
323         cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
324                  inode, file->f_flags, full_path));
325
326         if (oplockEnabled)
327                 oplock = REQ_OPLOCK;
328         else
329                 oplock = 0;
330
331         if (!tcon->broken_posix_open && tcon->unix_ext &&
332             (tcon->ses->capabilities & CAP_UNIX) &&
333             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
334                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
335                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
336                 /* can not refresh inode info since size could be stale */
337                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
338                                      cifs_sb->mnt_file_mode /* ignored */,
339                                      oflags, &oplock, &netfid, xid);
340                 if (rc == 0) {
341                         cFYI(1, ("posix open succeeded"));
342                         /* no need for special case handling of setting mode
343                            on read only files needed here */
344
345                         cifs_posix_open_inode_helper(inode, file, pCifsInode,
346                                                      pCifsFile, oplock, netfid);
347                         goto out;
348                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
349                         if (tcon->ses->serverNOS)
350                                 cERROR(1, ("server %s of type %s returned"
351                                            " unexpected error on SMB posix open"
352                                            ", disabling posix open support."
353                                            " Check if server update available.",
354                                            tcon->ses->serverName,
355                                            tcon->ses->serverNOS));
356                         tcon->broken_posix_open = true;
357                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
358                          (rc != -EOPNOTSUPP)) /* path not found or net err */
359                         goto out;
360                 /* else fallthrough to retry open the old way on network i/o
361                    or DFS errors */
362         }
363
364         desiredAccess = cifs_convert_flags(file->f_flags);
365
366 /*********************************************************************
367  *  open flag mapping table:
368  *
369  *      POSIX Flag            CIFS Disposition
370  *      ----------            ----------------
371  *      O_CREAT               FILE_OPEN_IF
372  *      O_CREAT | O_EXCL      FILE_CREATE
373  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
374  *      O_TRUNC               FILE_OVERWRITE
375  *      none of the above     FILE_OPEN
376  *
377  *      Note that there is not a direct match between disposition
378  *      FILE_SUPERSEDE (ie create whether or not file exists although
379  *      O_CREAT | O_TRUNC is similar but truncates the existing
380  *      file rather than creating a new file as FILE_SUPERSEDE does
381  *      (which uses the attributes / metadata passed in on open call)
382  *?
383  *?  O_SYNC is a reasonable match to CIFS writethrough flag
384  *?  and the read write flags match reasonably.  O_LARGEFILE
385  *?  is irrelevant because largefile support is always used
386  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
387  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
388  *********************************************************************/
389
390         disposition = cifs_get_disposition(file->f_flags);
391
392         /* BB pass O_SYNC flag through on file attributes .. BB */
393
394         /* Also refresh inode by passing in file_info buf returned by SMBOpen
395            and calling get_inode_info with returned buf (at least helps
396            non-Unix server case) */
397
398         /* BB we can not do this if this is the second open of a file
399            and the first handle has writebehind data, we might be
400            able to simply do a filemap_fdatawrite/filemap_fdatawait first */
401         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
402         if (!buf) {
403                 rc = -ENOMEM;
404                 goto out;
405         }
406
407         if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
408                 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
409                          desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
410                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
411                                  & CIFS_MOUNT_MAP_SPECIAL_CHR);
412         else
413                 rc = -EIO; /* no NT SMB support fall into legacy open below */
414
415         if (rc == -EIO) {
416                 /* Old server, try legacy style OpenX */
417                 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
418                         desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
419                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
420                                 & CIFS_MOUNT_MAP_SPECIAL_CHR);
421         }
422         if (rc) {
423                 cFYI(1, ("cifs_open returned 0x%x", rc));
424                 goto out;
425         }
426         file->private_data =
427                 kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
428         if (file->private_data == NULL) {
429                 rc = -ENOMEM;
430                 goto out;
431         }
432         pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
433         write_lock(&GlobalSMBSeslock);
434         list_add(&pCifsFile->tlist, &tcon->openFileList);
435
436         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
437         if (pCifsInode) {
438                 rc = cifs_open_inode_helper(inode, file, pCifsInode,
439                                             pCifsFile, tcon,
440                                             &oplock, buf, full_path, xid);
441         } else {
442                 write_unlock(&GlobalSMBSeslock);
443         }
444
445         if (oplock & CIFS_CREATE_ACTION) {
446                 /* time to set mode which we can not set earlier due to
447                    problems creating new read-only files */
448                 if (tcon->unix_ext) {
449                         struct cifs_unix_set_info_args args = {
450                                 .mode   = inode->i_mode,
451                                 .uid    = NO_CHANGE_64,
452                                 .gid    = NO_CHANGE_64,
453                                 .ctime  = NO_CHANGE_64,
454                                 .atime  = NO_CHANGE_64,
455                                 .mtime  = NO_CHANGE_64,
456                                 .device = 0,
457                         };
458                         CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
459                                             cifs_sb->local_nls,
460                                             cifs_sb->mnt_cifs_flags &
461                                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
462                 }
463         }
464
465 out:
466         kfree(buf);
467         kfree(full_path);
468         FreeXid(xid);
469         return rc;
470 }
471
472 /* Try to reacquire byte range locks that were released when session */
473 /* to server was lost */
474 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
475 {
476         int rc = 0;
477
478 /* BB list all locks open on this file and relock */
479
480         return rc;
481 }
482
483 static int cifs_reopen_file(struct file *file, bool can_flush)
484 {
485         int rc = -EACCES;
486         int xid, oplock;
487         struct cifs_sb_info *cifs_sb;
488         struct cifsTconInfo *tcon;
489         struct cifsFileInfo *pCifsFile;
490         struct cifsInodeInfo *pCifsInode;
491         struct inode *inode;
492         char *full_path = NULL;
493         int desiredAccess;
494         int disposition = FILE_OPEN;
495         __u16 netfid;
496
497         if (file->private_data)
498                 pCifsFile = (struct cifsFileInfo *)file->private_data;
499         else
500                 return -EBADF;
501
502         xid = GetXid();
503         down(&pCifsFile->fh_sem);
504         if (!pCifsFile->invalidHandle) {
505                 up(&pCifsFile->fh_sem);
506                 FreeXid(xid);
507                 return 0;
508         }
509
510         if (file->f_path.dentry == NULL) {
511                 cERROR(1, ("no valid name if dentry freed"));
512                 dump_stack();
513                 rc = -EBADF;
514                 goto reopen_error_exit;
515         }
516
517         inode = file->f_path.dentry->d_inode;
518         if (inode == NULL) {
519                 cERROR(1, ("inode not valid"));
520                 dump_stack();
521                 rc = -EBADF;
522                 goto reopen_error_exit;
523         }
524
525         cifs_sb = CIFS_SB(inode->i_sb);
526         tcon = cifs_sb->tcon;
527
528 /* can not grab rename sem here because various ops, including
529    those that already have the rename sem can end up causing writepage
530    to get called and if the server was down that means we end up here,
531    and we can never tell if the caller already has the rename_sem */
532         full_path = build_path_from_dentry(file->f_path.dentry);
533         if (full_path == NULL) {
534                 rc = -ENOMEM;
535 reopen_error_exit:
536                 up(&pCifsFile->fh_sem);
537                 FreeXid(xid);
538                 return rc;
539         }
540
541         cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
542                  inode, file->f_flags, full_path));
543
544         if (oplockEnabled)
545                 oplock = REQ_OPLOCK;
546         else
547                 oplock = 0;
548
549         if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
550             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
551                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
552                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
553                 /* can not refresh inode info since size could be stale */
554                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
555                                      cifs_sb->mnt_file_mode /* ignored */,
556                                      oflags, &oplock, &netfid, xid);
557                 if (rc == 0) {
558                         cFYI(1, ("posix reopen succeeded"));
559                         goto reopen_success;
560                 }
561                 /* fallthrough to retry open the old way on errors, especially
562                    in the reconnect path it is important to retry hard */
563         }
564
565         desiredAccess = cifs_convert_flags(file->f_flags);
566
567         /* Can not refresh inode by passing in file_info buf to be returned
568            by SMBOpen and then calling get_inode_info with returned buf
569            since file might have write behind data that needs to be flushed
570            and server version of file size can be stale. If we knew for sure
571            that inode was not dirty locally we could do this */
572
573         rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
574                          CREATE_NOT_DIR, &netfid, &oplock, NULL,
575                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
576                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
577         if (rc) {
578                 up(&pCifsFile->fh_sem);
579                 cFYI(1, ("cifs_open returned 0x%x", rc));
580                 cFYI(1, ("oplock: %d", oplock));
581         } else {
582 reopen_success:
583                 pCifsFile->netfid = netfid;
584                 pCifsFile->invalidHandle = false;
585                 up(&pCifsFile->fh_sem);
586                 pCifsInode = CIFS_I(inode);
587                 if (pCifsInode) {
588                         if (can_flush) {
589                                 rc = filemap_write_and_wait(inode->i_mapping);
590                                 if (rc != 0)
591                                         CIFS_I(inode)->write_behind_rc = rc;
592                         /* temporarily disable caching while we
593                            go to server to get inode info */
594                                 pCifsInode->clientCanCacheAll = false;
595                                 pCifsInode->clientCanCacheRead = false;
596                                 if (tcon->unix_ext)
597                                         rc = cifs_get_inode_info_unix(&inode,
598                                                 full_path, inode->i_sb, xid);
599                                 else
600                                         rc = cifs_get_inode_info(&inode,
601                                                 full_path, NULL, inode->i_sb,
602                                                 xid, NULL);
603                         } /* else we are writing out data to server already
604                              and could deadlock if we tried to flush data, and
605                              since we do not know if we have data that would
606                              invalidate the current end of file on the server
607                              we can not go to the server to get the new inod
608                              info */
609                         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
610                                 pCifsInode->clientCanCacheAll = true;
611                                 pCifsInode->clientCanCacheRead = true;
612                                 cFYI(1, ("Exclusive Oplock granted on inode %p",
613                                          file->f_path.dentry->d_inode));
614                         } else if ((oplock & 0xF) == OPLOCK_READ) {
615                                 pCifsInode->clientCanCacheRead = true;
616                                 pCifsInode->clientCanCacheAll = false;
617                         } else {
618                                 pCifsInode->clientCanCacheRead = false;
619                                 pCifsInode->clientCanCacheAll = false;
620                         }
621                         cifs_relock_file(pCifsFile);
622                 }
623         }
624         kfree(full_path);
625         FreeXid(xid);
626         return rc;
627 }
628
629 int cifs_close(struct inode *inode, struct file *file)
630 {
631         int rc = 0;
632         int xid, timeout;
633         struct cifs_sb_info *cifs_sb;
634         struct cifsTconInfo *pTcon;
635         struct cifsFileInfo *pSMBFile =
636                 (struct cifsFileInfo *)file->private_data;
637
638         xid = GetXid();
639
640         cifs_sb = CIFS_SB(inode->i_sb);
641         pTcon = cifs_sb->tcon;
642         if (pSMBFile) {
643                 struct cifsLockInfo *li, *tmp;
644                 write_lock(&GlobalSMBSeslock);
645                 pSMBFile->closePend = true;
646                 if (pTcon) {
647                         /* no sense reconnecting to close a file that is
648                            already closed */
649                         if (!pTcon->need_reconnect) {
650                                 write_unlock(&GlobalSMBSeslock);
651                                 timeout = 2;
652                                 while ((atomic_read(&pSMBFile->wrtPending) != 0)
653                                         && (timeout <= 2048)) {
654                                         /* Give write a better chance to get to
655                                         server ahead of the close.  We do not
656                                         want to add a wait_q here as it would
657                                         increase the memory utilization as
658                                         the struct would be in each open file,
659                                         but this should give enough time to
660                                         clear the socket */
661                                         cFYI(DBG2,
662                                                 ("close delay, write pending"));
663                                         msleep(timeout);
664                                         timeout *= 4;
665                                 }
666                                 if (atomic_read(&pSMBFile->wrtPending))
667                                         cERROR(1, ("close with pending write"));
668                                 if (!pTcon->need_reconnect &&
669                                     !pSMBFile->invalidHandle)
670                                         rc = CIFSSMBClose(xid, pTcon,
671                                                   pSMBFile->netfid);
672                         } else
673                                 write_unlock(&GlobalSMBSeslock);
674                 } else
675                         write_unlock(&GlobalSMBSeslock);
676
677                 /* Delete any outstanding lock records.
678                    We'll lose them when the file is closed anyway. */
679                 mutex_lock(&pSMBFile->lock_mutex);
680                 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
681                         list_del(&li->llist);
682                         kfree(li);
683                 }
684                 mutex_unlock(&pSMBFile->lock_mutex);
685
686                 write_lock(&GlobalSMBSeslock);
687                 list_del(&pSMBFile->flist);
688                 list_del(&pSMBFile->tlist);
689                 write_unlock(&GlobalSMBSeslock);
690                 timeout = 10;
691                 /* We waited above to give the SMBWrite a chance to issue
692                    on the wire (so we do not get SMBWrite returning EBADF
693                    if writepages is racing with close.  Note that writepages
694                    does not specify a file handle, so it is possible for a file
695                    to be opened twice, and the application close the "wrong"
696                    file handle - in these cases we delay long enough to allow
697                    the SMBWrite to get on the wire before the SMB Close.
698                    We allow total wait here over 45 seconds, more than
699                    oplock break time, and more than enough to allow any write
700                    to complete on the server, or to time out on the client */
701                 while ((atomic_read(&pSMBFile->wrtPending) != 0)
702                                 && (timeout <= 50000)) {
703                         cERROR(1, ("writes pending, delay free of handle"));
704                         msleep(timeout);
705                         timeout *= 8;
706                 }
707                 kfree(file->private_data);
708                 file->private_data = NULL;
709         } else
710                 rc = -EBADF;
711
712         read_lock(&GlobalSMBSeslock);
713         if (list_empty(&(CIFS_I(inode)->openFileList))) {
714                 cFYI(1, ("closing last open instance for inode %p", inode));
715                 /* if the file is not open we do not know if we can cache info
716                    on this inode, much less write behind and read ahead */
717                 CIFS_I(inode)->clientCanCacheRead = false;
718                 CIFS_I(inode)->clientCanCacheAll  = false;
719         }
720         read_unlock(&GlobalSMBSeslock);
721         if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
722                 rc = CIFS_I(inode)->write_behind_rc;
723         FreeXid(xid);
724         return rc;
725 }
726
727 int cifs_closedir(struct inode *inode, struct file *file)
728 {
729         int rc = 0;
730         int xid;
731         struct cifsFileInfo *pCFileStruct =
732             (struct cifsFileInfo *)file->private_data;
733         char *ptmp;
734
735         cFYI(1, ("Closedir inode = 0x%p", inode));
736
737         xid = GetXid();
738
739         if (pCFileStruct) {
740                 struct cifsTconInfo *pTcon;
741                 struct cifs_sb_info *cifs_sb =
742                         CIFS_SB(file->f_path.dentry->d_sb);
743
744                 pTcon = cifs_sb->tcon;
745
746                 cFYI(1, ("Freeing private data in close dir"));
747                 write_lock(&GlobalSMBSeslock);
748                 if (!pCFileStruct->srch_inf.endOfSearch &&
749                     !pCFileStruct->invalidHandle) {
750                         pCFileStruct->invalidHandle = true;
751                         write_unlock(&GlobalSMBSeslock);
752                         rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
753                         cFYI(1, ("Closing uncompleted readdir with rc %d",
754                                  rc));
755                         /* not much we can do if it fails anyway, ignore rc */
756                         rc = 0;
757                 } else
758                         write_unlock(&GlobalSMBSeslock);
759                 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
760                 if (ptmp) {
761                         cFYI(1, ("closedir free smb buf in srch struct"));
762                         pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
763                         if (pCFileStruct->srch_inf.smallBuf)
764                                 cifs_small_buf_release(ptmp);
765                         else
766                                 cifs_buf_release(ptmp);
767                 }
768                 kfree(file->private_data);
769                 file->private_data = NULL;
770         }
771         /* BB can we lock the filestruct while this is going on? */
772         FreeXid(xid);
773         return rc;
774 }
775
776 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
777                                 __u64 offset, __u8 lockType)
778 {
779         struct cifsLockInfo *li =
780                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
781         if (li == NULL)
782                 return -ENOMEM;
783         li->offset = offset;
784         li->length = len;
785         li->type = lockType;
786         mutex_lock(&fid->lock_mutex);
787         list_add(&li->llist, &fid->llist);
788         mutex_unlock(&fid->lock_mutex);
789         return 0;
790 }
791
792 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
793 {
794         int rc, xid;
795         __u32 numLock = 0;
796         __u32 numUnlock = 0;
797         __u64 length;
798         bool wait_flag = false;
799         struct cifs_sb_info *cifs_sb;
800         struct cifsTconInfo *tcon;
801         __u16 netfid;
802         __u8 lockType = LOCKING_ANDX_LARGE_FILES;
803         bool posix_locking = 0;
804
805         length = 1 + pfLock->fl_end - pfLock->fl_start;
806         rc = -EACCES;
807         xid = GetXid();
808
809         cFYI(1, ("Lock parm: 0x%x flockflags: "
810                  "0x%x flocktype: 0x%x start: %lld end: %lld",
811                 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
812                 pfLock->fl_end));
813
814         if (pfLock->fl_flags & FL_POSIX)
815                 cFYI(1, ("Posix"));
816         if (pfLock->fl_flags & FL_FLOCK)
817                 cFYI(1, ("Flock"));
818         if (pfLock->fl_flags & FL_SLEEP) {
819                 cFYI(1, ("Blocking lock"));
820                 wait_flag = true;
821         }
822         if (pfLock->fl_flags & FL_ACCESS)
823                 cFYI(1, ("Process suspended by mandatory locking - "
824                          "not implemented yet"));
825         if (pfLock->fl_flags & FL_LEASE)
826                 cFYI(1, ("Lease on file - not implemented yet"));
827         if (pfLock->fl_flags &
828             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
829                 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags));
830
831         if (pfLock->fl_type == F_WRLCK) {
832                 cFYI(1, ("F_WRLCK "));
833                 numLock = 1;
834         } else if (pfLock->fl_type == F_UNLCK) {
835                 cFYI(1, ("F_UNLCK"));
836                 numUnlock = 1;
837                 /* Check if unlock includes more than
838                 one lock range */
839         } else if (pfLock->fl_type == F_RDLCK) {
840                 cFYI(1, ("F_RDLCK"));
841                 lockType |= LOCKING_ANDX_SHARED_LOCK;
842                 numLock = 1;
843         } else if (pfLock->fl_type == F_EXLCK) {
844                 cFYI(1, ("F_EXLCK"));
845                 numLock = 1;
846         } else if (pfLock->fl_type == F_SHLCK) {
847                 cFYI(1, ("F_SHLCK"));
848                 lockType |= LOCKING_ANDX_SHARED_LOCK;
849                 numLock = 1;
850         } else
851                 cFYI(1, ("Unknown type of lock"));
852
853         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
854         tcon = cifs_sb->tcon;
855
856         if (file->private_data == NULL) {
857                 FreeXid(xid);
858                 return -EBADF;
859         }
860         netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
861
862         if ((tcon->ses->capabilities & CAP_UNIX) &&
863             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
864             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
865                 posix_locking = 1;
866         /* BB add code here to normalize offset and length to
867         account for negative length which we can not accept over the
868         wire */
869         if (IS_GETLK(cmd)) {
870                 if (posix_locking) {
871                         int posix_lock_type;
872                         if (lockType & LOCKING_ANDX_SHARED_LOCK)
873                                 posix_lock_type = CIFS_RDLCK;
874                         else
875                                 posix_lock_type = CIFS_WRLCK;
876                         rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
877                                         length, pfLock,
878                                         posix_lock_type, wait_flag);
879                         FreeXid(xid);
880                         return rc;
881                 }
882
883                 /* BB we could chain these into one lock request BB */
884                 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
885                                  0, 1, lockType, 0 /* wait flag */ );
886                 if (rc == 0) {
887                         rc = CIFSSMBLock(xid, tcon, netfid, length,
888                                          pfLock->fl_start, 1 /* numUnlock */ ,
889                                          0 /* numLock */ , lockType,
890                                          0 /* wait flag */ );
891                         pfLock->fl_type = F_UNLCK;
892                         if (rc != 0)
893                                 cERROR(1, ("Error unlocking previously locked "
894                                            "range %d during test of lock", rc));
895                         rc = 0;
896
897                 } else {
898                         /* if rc == ERR_SHARING_VIOLATION ? */
899                         rc = 0; /* do not change lock type to unlock
900                                    since range in use */
901                 }
902
903                 FreeXid(xid);
904                 return rc;
905         }
906
907         if (!numLock && !numUnlock) {
908                 /* if no lock or unlock then nothing
909                 to do since we do not know what it is */
910                 FreeXid(xid);
911                 return -EOPNOTSUPP;
912         }
913
914         if (posix_locking) {
915                 int posix_lock_type;
916                 if (lockType & LOCKING_ANDX_SHARED_LOCK)
917                         posix_lock_type = CIFS_RDLCK;
918                 else
919                         posix_lock_type = CIFS_WRLCK;
920
921                 if (numUnlock == 1)
922                         posix_lock_type = CIFS_UNLCK;
923
924                 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
925                                       length, pfLock,
926                                       posix_lock_type, wait_flag);
927         } else {
928                 struct cifsFileInfo *fid =
929                         (struct cifsFileInfo *)file->private_data;
930
931                 if (numLock) {
932                         rc = CIFSSMBLock(xid, tcon, netfid, length,
933                                         pfLock->fl_start,
934                                         0, numLock, lockType, wait_flag);
935
936                         if (rc == 0) {
937                                 /* For Windows locks we must store them. */
938                                 rc = store_file_lock(fid, length,
939                                                 pfLock->fl_start, lockType);
940                         }
941                 } else if (numUnlock) {
942                         /* For each stored lock that this unlock overlaps
943                            completely, unlock it. */
944                         int stored_rc = 0;
945                         struct cifsLockInfo *li, *tmp;
946
947                         rc = 0;
948                         mutex_lock(&fid->lock_mutex);
949                         list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
950                                 if (pfLock->fl_start <= li->offset &&
951                                                 (pfLock->fl_start + length) >=
952                                                 (li->offset + li->length)) {
953                                         stored_rc = CIFSSMBLock(xid, tcon,
954                                                         netfid,
955                                                         li->length, li->offset,
956                                                         1, 0, li->type, false);
957                                         if (stored_rc)
958                                                 rc = stored_rc;
959
960                                         list_del(&li->llist);
961                                         kfree(li);
962                                 }
963                         }
964                         mutex_unlock(&fid->lock_mutex);
965                 }
966         }
967
968         if (pfLock->fl_flags & FL_POSIX)
969                 posix_lock_file_wait(file, pfLock);
970         FreeXid(xid);
971         return rc;
972 }
973
974 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
975         size_t write_size, loff_t *poffset)
976 {
977         int rc = 0;
978         unsigned int bytes_written = 0;
979         unsigned int total_written;
980         struct cifs_sb_info *cifs_sb;
981         struct cifsTconInfo *pTcon;
982         int xid, long_op;
983         struct cifsFileInfo *open_file;
984
985         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
986
987         pTcon = cifs_sb->tcon;
988
989         /* cFYI(1,
990            (" write %d bytes to offset %lld of %s", write_size,
991            *poffset, file->f_path.dentry->d_name.name)); */
992
993         if (file->private_data == NULL)
994                 return -EBADF;
995         open_file = (struct cifsFileInfo *) file->private_data;
996
997         rc = generic_write_checks(file, poffset, &write_size, 0);
998         if (rc)
999                 return rc;
1000
1001         xid = GetXid();
1002
1003         if (*poffset > file->f_path.dentry->d_inode->i_size)
1004                 long_op = CIFS_VLONG_OP; /* writes past EOF take long time */
1005         else
1006                 long_op = CIFS_LONG_OP;
1007
1008         for (total_written = 0; write_size > total_written;
1009              total_written += bytes_written) {
1010                 rc = -EAGAIN;
1011                 while (rc == -EAGAIN) {
1012                         if (file->private_data == NULL) {
1013                                 /* file has been closed on us */
1014                                 FreeXid(xid);
1015                         /* if we have gotten here we have written some data
1016                            and blocked, and the file has been freed on us while
1017                            we blocked so return what we managed to write */
1018                                 return total_written;
1019                         }
1020                         if (open_file->closePend) {
1021                                 FreeXid(xid);
1022                                 if (total_written)
1023                                         return total_written;
1024                                 else
1025                                         return -EBADF;
1026                         }
1027                         if (open_file->invalidHandle) {
1028                                 /* we could deadlock if we called
1029                                    filemap_fdatawait from here so tell
1030                                    reopen_file not to flush data to server
1031                                    now */
1032                                 rc = cifs_reopen_file(file, false);
1033                                 if (rc != 0)
1034                                         break;
1035                         }
1036
1037                         rc = CIFSSMBWrite(xid, pTcon,
1038                                 open_file->netfid,
1039                                 min_t(const int, cifs_sb->wsize,
1040                                       write_size - total_written),
1041                                 *poffset, &bytes_written,
1042                                 NULL, write_data + total_written, long_op);
1043                 }
1044                 if (rc || (bytes_written == 0)) {
1045                         if (total_written)
1046                                 break;
1047                         else {
1048                                 FreeXid(xid);
1049                                 return rc;
1050                         }
1051                 } else
1052                         *poffset += bytes_written;
1053                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1054                                     15 seconds is plenty */
1055         }
1056
1057         cifs_stats_bytes_written(pTcon, total_written);
1058
1059         /* since the write may have blocked check these pointers again */
1060         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1061                 struct inode *inode = file->f_path.dentry->d_inode;
1062 /* Do not update local mtime - server will set its actual value on write
1063  *              inode->i_ctime = inode->i_mtime =
1064  *                      current_fs_time(inode->i_sb);*/
1065                 if (total_written > 0) {
1066                         spin_lock(&inode->i_lock);
1067                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1068                                 i_size_write(file->f_path.dentry->d_inode,
1069                                         *poffset);
1070                         spin_unlock(&inode->i_lock);
1071                 }
1072                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1073         }
1074         FreeXid(xid);
1075         return total_written;
1076 }
1077
1078 static ssize_t cifs_write(struct file *file, const char *write_data,
1079                           size_t write_size, loff_t *poffset)
1080 {
1081         int rc = 0;
1082         unsigned int bytes_written = 0;
1083         unsigned int total_written;
1084         struct cifs_sb_info *cifs_sb;
1085         struct cifsTconInfo *pTcon;
1086         int xid, long_op;
1087         struct cifsFileInfo *open_file;
1088
1089         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1090
1091         pTcon = cifs_sb->tcon;
1092
1093         cFYI(1, ("write %zd bytes to offset %lld of %s", write_size,
1094            *poffset, file->f_path.dentry->d_name.name));
1095
1096         if (file->private_data == NULL)
1097                 return -EBADF;
1098         open_file = (struct cifsFileInfo *)file->private_data;
1099
1100         xid = GetXid();
1101
1102         if (*poffset > file->f_path.dentry->d_inode->i_size)
1103                 long_op = CIFS_VLONG_OP; /* writes past EOF can be slow */
1104         else
1105                 long_op = CIFS_LONG_OP;
1106
1107         for (total_written = 0; write_size > total_written;
1108              total_written += bytes_written) {
1109                 rc = -EAGAIN;
1110                 while (rc == -EAGAIN) {
1111                         if (file->private_data == NULL) {
1112                                 /* file has been closed on us */
1113                                 FreeXid(xid);
1114                         /* if we have gotten here we have written some data
1115                            and blocked, and the file has been freed on us
1116                            while we blocked so return what we managed to
1117                            write */
1118                                 return total_written;
1119                         }
1120                         if (open_file->closePend) {
1121                                 FreeXid(xid);
1122                                 if (total_written)
1123                                         return total_written;
1124                                 else
1125                                         return -EBADF;
1126                         }
1127                         if (open_file->invalidHandle) {
1128                                 /* we could deadlock if we called
1129                                    filemap_fdatawait from here so tell
1130                                    reopen_file not to flush data to
1131                                    server now */
1132                                 rc = cifs_reopen_file(file, false);
1133                                 if (rc != 0)
1134                                         break;
1135                         }
1136                         if (experimEnabled || (pTcon->ses->server &&
1137                                 ((pTcon->ses->server->secMode &
1138                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1139                                 == 0))) {
1140                                 struct kvec iov[2];
1141                                 unsigned int len;
1142
1143                                 len = min((size_t)cifs_sb->wsize,
1144                                           write_size - total_written);
1145                                 /* iov[0] is reserved for smb header */
1146                                 iov[1].iov_base = (char *)write_data +
1147                                                   total_written;
1148                                 iov[1].iov_len = len;
1149                                 rc = CIFSSMBWrite2(xid, pTcon,
1150                                                 open_file->netfid, len,
1151                                                 *poffset, &bytes_written,
1152                                                 iov, 1, long_op);
1153                         } else
1154                                 rc = CIFSSMBWrite(xid, pTcon,
1155                                          open_file->netfid,
1156                                          min_t(const int, cifs_sb->wsize,
1157                                                write_size - total_written),
1158                                          *poffset, &bytes_written,
1159                                          write_data + total_written,
1160                                          NULL, long_op);
1161                 }
1162                 if (rc || (bytes_written == 0)) {
1163                         if (total_written)
1164                                 break;
1165                         else {
1166                                 FreeXid(xid);
1167                                 return rc;
1168                         }
1169                 } else
1170                         *poffset += bytes_written;
1171                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1172                                     15 seconds is plenty */
1173         }
1174
1175         cifs_stats_bytes_written(pTcon, total_written);
1176
1177         /* since the write may have blocked check these pointers again */
1178         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1179 /*BB We could make this contingent on superblock ATIME flag too */
1180 /*              file->f_path.dentry->d_inode->i_ctime =
1181                 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1182                 if (total_written > 0) {
1183                         spin_lock(&file->f_path.dentry->d_inode->i_lock);
1184                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1185                                 i_size_write(file->f_path.dentry->d_inode,
1186                                              *poffset);
1187                         spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1188                 }
1189                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1190         }
1191         FreeXid(xid);
1192         return total_written;
1193 }
1194
1195 #ifdef CONFIG_CIFS_EXPERIMENTAL
1196 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1197 {
1198         struct cifsFileInfo *open_file = NULL;
1199
1200         read_lock(&GlobalSMBSeslock);
1201         /* we could simply get the first_list_entry since write-only entries
1202            are always at the end of the list but since the first entry might
1203            have a close pending, we go through the whole list */
1204         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1205                 if (open_file->closePend)
1206                         continue;
1207                 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1208                     (open_file->pfile->f_flags & O_RDONLY))) {
1209                         if (!open_file->invalidHandle) {
1210                                 /* found a good file */
1211                                 /* lock it so it will not be closed on us */
1212                                 atomic_inc(&open_file->wrtPending);
1213                                 read_unlock(&GlobalSMBSeslock);
1214                                 return open_file;
1215                         } /* else might as well continue, and look for
1216                              another, or simply have the caller reopen it
1217                              again rather than trying to fix this handle */
1218                 } else /* write only file */
1219                         break; /* write only files are last so must be done */
1220         }
1221         read_unlock(&GlobalSMBSeslock);
1222         return NULL;
1223 }
1224 #endif
1225
1226 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1227 {
1228         struct cifsFileInfo *open_file;
1229         bool any_available = false;
1230         int rc;
1231
1232         /* Having a null inode here (because mapping->host was set to zero by
1233         the VFS or MM) should not happen but we had reports of on oops (due to
1234         it being zero) during stress testcases so we need to check for it */
1235
1236         if (cifs_inode == NULL) {
1237                 cERROR(1, ("Null inode passed to cifs_writeable_file"));
1238                 dump_stack();
1239                 return NULL;
1240         }
1241
1242         read_lock(&GlobalSMBSeslock);
1243 refind_writable:
1244         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1245                 if (open_file->closePend ||
1246                     (!any_available && open_file->pid != current->tgid))
1247                         continue;
1248
1249                 if (open_file->pfile &&
1250                     ((open_file->pfile->f_flags & O_RDWR) ||
1251                      (open_file->pfile->f_flags & O_WRONLY))) {
1252                         atomic_inc(&open_file->wrtPending);
1253
1254                         if (!open_file->invalidHandle) {
1255                                 /* found a good writable file */
1256                                 read_unlock(&GlobalSMBSeslock);
1257                                 return open_file;
1258                         }
1259
1260                         read_unlock(&GlobalSMBSeslock);
1261                         /* Had to unlock since following call can block */
1262                         rc = cifs_reopen_file(open_file->pfile, false);
1263                         if (!rc) {
1264                                 if (!open_file->closePend)
1265                                         return open_file;
1266                                 else { /* start over in case this was deleted */
1267                                        /* since the list could be modified */
1268                                         read_lock(&GlobalSMBSeslock);
1269                                         atomic_dec(&open_file->wrtPending);
1270                                         goto refind_writable;
1271                                 }
1272                         }
1273
1274                         /* if it fails, try another handle if possible -
1275                         (we can not do this if closePending since
1276                         loop could be modified - in which case we
1277                         have to start at the beginning of the list
1278                         again. Note that it would be bad
1279                         to hold up writepages here (rather than
1280                         in caller) with continuous retries */
1281                         cFYI(1, ("wp failed on reopen file"));
1282                         read_lock(&GlobalSMBSeslock);
1283                         /* can not use this handle, no write
1284                            pending on this one after all */
1285                         atomic_dec(&open_file->wrtPending);
1286
1287                         if (open_file->closePend) /* list could have changed */
1288                                 goto refind_writable;
1289                         /* else we simply continue to the next entry. Thus
1290                            we do not loop on reopen errors.  If we
1291                            can not reopen the file, for example if we
1292                            reconnected to a server with another client
1293                            racing to delete or lock the file we would not
1294                            make progress if we restarted before the beginning
1295                            of the loop here. */
1296                 }
1297         }
1298         /* couldn't find useable FH with same pid, try any available */
1299         if (!any_available) {
1300                 any_available = true;
1301                 goto refind_writable;
1302         }
1303         read_unlock(&GlobalSMBSeslock);
1304         return NULL;
1305 }
1306
1307 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1308 {
1309         struct address_space *mapping = page->mapping;
1310         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1311         char *write_data;
1312         int rc = -EFAULT;
1313         int bytes_written = 0;
1314         struct cifs_sb_info *cifs_sb;
1315         struct cifsTconInfo *pTcon;
1316         struct inode *inode;
1317         struct cifsFileInfo *open_file;
1318
1319         if (!mapping || !mapping->host)
1320                 return -EFAULT;
1321
1322         inode = page->mapping->host;
1323         cifs_sb = CIFS_SB(inode->i_sb);
1324         pTcon = cifs_sb->tcon;
1325
1326         offset += (loff_t)from;
1327         write_data = kmap(page);
1328         write_data += from;
1329
1330         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1331                 kunmap(page);
1332                 return -EIO;
1333         }
1334
1335         /* racing with truncate? */
1336         if (offset > mapping->host->i_size) {
1337                 kunmap(page);
1338                 return 0; /* don't care */
1339         }
1340
1341         /* check to make sure that we are not extending the file */
1342         if (mapping->host->i_size - offset < (loff_t)to)
1343                 to = (unsigned)(mapping->host->i_size - offset);
1344
1345         open_file = find_writable_file(CIFS_I(mapping->host));
1346         if (open_file) {
1347                 bytes_written = cifs_write(open_file->pfile, write_data,
1348                                            to-from, &offset);
1349                 atomic_dec(&open_file->wrtPending);
1350                 /* Does mm or vfs already set times? */
1351                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1352                 if ((bytes_written > 0) && (offset))
1353                         rc = 0;
1354                 else if (bytes_written < 0)
1355                         rc = bytes_written;
1356         } else {
1357                 cFYI(1, ("No writeable filehandles for inode"));
1358                 rc = -EIO;
1359         }
1360
1361         kunmap(page);
1362         return rc;
1363 }
1364
1365 static int cifs_writepages(struct address_space *mapping,
1366                            struct writeback_control *wbc)
1367 {
1368         struct backing_dev_info *bdi = mapping->backing_dev_info;
1369         unsigned int bytes_to_write;
1370         unsigned int bytes_written;
1371         struct cifs_sb_info *cifs_sb;
1372         int done = 0;
1373         pgoff_t end;
1374         pgoff_t index;
1375         int range_whole = 0;
1376         struct kvec *iov;
1377         int len;
1378         int n_iov = 0;
1379         pgoff_t next;
1380         int nr_pages;
1381         __u64 offset = 0;
1382         struct cifsFileInfo *open_file;
1383         struct page *page;
1384         struct pagevec pvec;
1385         int rc = 0;
1386         int scanned = 0;
1387         int xid;
1388
1389         cifs_sb = CIFS_SB(mapping->host->i_sb);
1390
1391         /*
1392          * If wsize is smaller that the page cache size, default to writing
1393          * one page at a time via cifs_writepage
1394          */
1395         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1396                 return generic_writepages(mapping, wbc);
1397
1398         if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1399                 if (cifs_sb->tcon->ses->server->secMode &
1400                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1401                         if (!experimEnabled)
1402                                 return generic_writepages(mapping, wbc);
1403
1404         iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1405         if (iov == NULL)
1406                 return generic_writepages(mapping, wbc);
1407
1408
1409         /*
1410          * BB: Is this meaningful for a non-block-device file system?
1411          * If it is, we should test it again after we do I/O
1412          */
1413         if (wbc->nonblocking && bdi_write_congested(bdi)) {
1414                 wbc->encountered_congestion = 1;
1415                 kfree(iov);
1416                 return 0;
1417         }
1418
1419         xid = GetXid();
1420
1421         pagevec_init(&pvec, 0);
1422         if (wbc->range_cyclic) {
1423                 index = mapping->writeback_index; /* Start from prev offset */
1424                 end = -1;
1425         } else {
1426                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1427                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1428                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1429                         range_whole = 1;
1430                 scanned = 1;
1431         }
1432 retry:
1433         while (!done && (index <= end) &&
1434                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1435                         PAGECACHE_TAG_DIRTY,
1436                         min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1437                 int first;
1438                 unsigned int i;
1439
1440                 first = -1;
1441                 next = 0;
1442                 n_iov = 0;
1443                 bytes_to_write = 0;
1444
1445                 for (i = 0; i < nr_pages; i++) {
1446                         page = pvec.pages[i];
1447                         /*
1448                          * At this point we hold neither mapping->tree_lock nor
1449                          * lock on the page itself: the page may be truncated or
1450                          * invalidated (changing page->mapping to NULL), or even
1451                          * swizzled back from swapper_space to tmpfs file
1452                          * mapping
1453                          */
1454
1455                         if (first < 0)
1456                                 lock_page(page);
1457                         else if (!trylock_page(page))
1458                                 break;
1459
1460                         if (unlikely(page->mapping != mapping)) {
1461                                 unlock_page(page);
1462                                 break;
1463                         }
1464
1465                         if (!wbc->range_cyclic && page->index > end) {
1466                                 done = 1;
1467                                 unlock_page(page);
1468                                 break;
1469                         }
1470
1471                         if (next && (page->index != next)) {
1472                                 /* Not next consecutive page */
1473                                 unlock_page(page);
1474                                 break;
1475                         }
1476
1477                         if (wbc->sync_mode != WB_SYNC_NONE)
1478                                 wait_on_page_writeback(page);
1479
1480                         if (PageWriteback(page) ||
1481                                         !clear_page_dirty_for_io(page)) {
1482                                 unlock_page(page);
1483                                 break;
1484                         }
1485
1486                         /*
1487                          * This actually clears the dirty bit in the radix tree.
1488                          * See cifs_writepage() for more commentary.
1489                          */
1490                         set_page_writeback(page);
1491
1492                         if (page_offset(page) >= mapping->host->i_size) {
1493                                 done = 1;
1494                                 unlock_page(page);
1495                                 end_page_writeback(page);
1496                                 break;
1497                         }
1498
1499                         /*
1500                          * BB can we get rid of this?  pages are held by pvec
1501                          */
1502                         page_cache_get(page);
1503
1504                         len = min(mapping->host->i_size - page_offset(page),
1505                                   (loff_t)PAGE_CACHE_SIZE);
1506
1507                         /* reserve iov[0] for the smb header */
1508                         n_iov++;
1509                         iov[n_iov].iov_base = kmap(page);
1510                         iov[n_iov].iov_len = len;
1511                         bytes_to_write += len;
1512
1513                         if (first < 0) {
1514                                 first = i;
1515                                 offset = page_offset(page);
1516                         }
1517                         next = page->index + 1;
1518                         if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1519                                 break;
1520                 }
1521                 if (n_iov) {
1522                         /* Search for a writable handle every time we call
1523                          * CIFSSMBWrite2.  We can't rely on the last handle
1524                          * we used to still be valid
1525                          */
1526                         open_file = find_writable_file(CIFS_I(mapping->host));
1527                         if (!open_file) {
1528                                 cERROR(1, ("No writable handles for inode"));
1529                                 rc = -EBADF;
1530                         } else {
1531                                 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1532                                                    open_file->netfid,
1533                                                    bytes_to_write, offset,
1534                                                    &bytes_written, iov, n_iov,
1535                                                    CIFS_LONG_OP);
1536                                 atomic_dec(&open_file->wrtPending);
1537                                 if (rc || bytes_written < bytes_to_write) {
1538                                         cERROR(1, ("Write2 ret %d, wrote %d",
1539                                                   rc, bytes_written));
1540                                         /* BB what if continued retry is
1541                                            requested via mount flags? */
1542                                         if (rc == -ENOSPC)
1543                                                 set_bit(AS_ENOSPC, &mapping->flags);
1544                                         else
1545                                                 set_bit(AS_EIO, &mapping->flags);
1546                                 } else {
1547                                         cifs_stats_bytes_written(cifs_sb->tcon,
1548                                                                  bytes_written);
1549                                 }
1550                         }
1551                         for (i = 0; i < n_iov; i++) {
1552                                 page = pvec.pages[first + i];
1553                                 /* Should we also set page error on
1554                                 success rc but too little data written? */
1555                                 /* BB investigate retry logic on temporary
1556                                 server crash cases and how recovery works
1557                                 when page marked as error */
1558                                 if (rc)
1559                                         SetPageError(page);
1560                                 kunmap(page);
1561                                 unlock_page(page);
1562                                 end_page_writeback(page);
1563                                 page_cache_release(page);
1564                         }
1565                         if ((wbc->nr_to_write -= n_iov) <= 0)
1566                                 done = 1;
1567                         index = next;
1568                 } else
1569                         /* Need to re-find the pages we skipped */
1570                         index = pvec.pages[0]->index + 1;
1571
1572                 pagevec_release(&pvec);
1573         }
1574         if (!scanned && !done) {
1575                 /*
1576                  * We hit the last page and there is more work to be done: wrap
1577                  * back to the start of the file
1578                  */
1579                 scanned = 1;
1580                 index = 0;
1581                 goto retry;
1582         }
1583         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1584                 mapping->writeback_index = index;
1585
1586         FreeXid(xid);
1587         kfree(iov);
1588         return rc;
1589 }
1590
1591 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1592 {
1593         int rc = -EFAULT;
1594         int xid;
1595
1596         xid = GetXid();
1597 /* BB add check for wbc flags */
1598         page_cache_get(page);
1599         if (!PageUptodate(page))
1600                 cFYI(1, ("ppw - page not up to date"));
1601
1602         /*
1603          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1604          *
1605          * A writepage() implementation always needs to do either this,
1606          * or re-dirty the page with "redirty_page_for_writepage()" in
1607          * the case of a failure.
1608          *
1609          * Just unlocking the page will cause the radix tree tag-bits
1610          * to fail to update with the state of the page correctly.
1611          */
1612         set_page_writeback(page);
1613         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1614         SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1615         unlock_page(page);
1616         end_page_writeback(page);
1617         page_cache_release(page);
1618         FreeXid(xid);
1619         return rc;
1620 }
1621
1622 static int cifs_write_end(struct file *file, struct address_space *mapping,
1623                         loff_t pos, unsigned len, unsigned copied,
1624                         struct page *page, void *fsdata)
1625 {
1626         int rc;
1627         struct inode *inode = mapping->host;
1628
1629         cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
1630                  page, pos, copied));
1631
1632         if (PageChecked(page)) {
1633                 if (copied == len)
1634                         SetPageUptodate(page);
1635                 ClearPageChecked(page);
1636         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1637                 SetPageUptodate(page);
1638
1639         if (!PageUptodate(page)) {
1640                 char *page_data;
1641                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1642                 int xid;
1643
1644                 xid = GetXid();
1645                 /* this is probably better than directly calling
1646                    partialpage_write since in this function the file handle is
1647                    known which we might as well leverage */
1648                 /* BB check if anything else missing out of ppw
1649                    such as updating last write time */
1650                 page_data = kmap(page);
1651                 rc = cifs_write(file, page_data + offset, copied, &pos);
1652                 /* if (rc < 0) should we set writebehind rc? */
1653                 kunmap(page);
1654
1655                 FreeXid(xid);
1656         } else {
1657                 rc = copied;
1658                 pos += copied;
1659                 set_page_dirty(page);
1660         }
1661
1662         if (rc > 0) {
1663                 spin_lock(&inode->i_lock);
1664                 if (pos > inode->i_size)
1665                         i_size_write(inode, pos);
1666                 spin_unlock(&inode->i_lock);
1667         }
1668
1669         unlock_page(page);
1670         page_cache_release(page);
1671
1672         return rc;
1673 }
1674
1675 int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1676 {
1677         int xid;
1678         int rc = 0;
1679         struct cifsTconInfo *tcon;
1680         struct cifsFileInfo *smbfile =
1681                 (struct cifsFileInfo *)file->private_data;
1682         struct inode *inode = file->f_path.dentry->d_inode;
1683
1684         xid = GetXid();
1685
1686         cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1687                 dentry->d_name.name, datasync));
1688
1689         rc = filemap_write_and_wait(inode->i_mapping);
1690         if (rc == 0) {
1691                 rc = CIFS_I(inode)->write_behind_rc;
1692                 CIFS_I(inode)->write_behind_rc = 0;
1693                 tcon = CIFS_SB(inode->i_sb)->tcon;
1694                 if (!rc && tcon && smbfile &&
1695                    !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1696                         rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1697         }
1698
1699         FreeXid(xid);
1700         return rc;
1701 }
1702
1703 /* static void cifs_sync_page(struct page *page)
1704 {
1705         struct address_space *mapping;
1706         struct inode *inode;
1707         unsigned long index = page->index;
1708         unsigned int rpages = 0;
1709         int rc = 0;
1710
1711         cFYI(1, ("sync page %p",page));
1712         mapping = page->mapping;
1713         if (!mapping)
1714                 return 0;
1715         inode = mapping->host;
1716         if (!inode)
1717                 return; */
1718
1719 /*      fill in rpages then
1720         result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1721
1722 /*      cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1723
1724 #if 0
1725         if (rc < 0)
1726                 return rc;
1727         return 0;
1728 #endif
1729 } */
1730
1731 /*
1732  * As file closes, flush all cached write data for this inode checking
1733  * for write behind errors.
1734  */
1735 int cifs_flush(struct file *file, fl_owner_t id)
1736 {
1737         struct inode *inode = file->f_path.dentry->d_inode;
1738         int rc = 0;
1739
1740         /* Rather than do the steps manually:
1741            lock the inode for writing
1742            loop through pages looking for write behind data (dirty pages)
1743            coalesce into contiguous 16K (or smaller) chunks to write to server
1744            send to server (prefer in parallel)
1745            deal with writebehind errors
1746            unlock inode for writing
1747            filemapfdatawrite appears easier for the time being */
1748
1749         rc = filemap_fdatawrite(inode->i_mapping);
1750         /* reset wb rc if we were able to write out dirty pages */
1751         if (!rc) {
1752                 rc = CIFS_I(inode)->write_behind_rc;
1753                 CIFS_I(inode)->write_behind_rc = 0;
1754         }
1755
1756         cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc));
1757
1758         return rc;
1759 }
1760
1761 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1762         size_t read_size, loff_t *poffset)
1763 {
1764         int rc = -EACCES;
1765         unsigned int bytes_read = 0;
1766         unsigned int total_read = 0;
1767         unsigned int current_read_size;
1768         struct cifs_sb_info *cifs_sb;
1769         struct cifsTconInfo *pTcon;
1770         int xid;
1771         struct cifsFileInfo *open_file;
1772         char *smb_read_data;
1773         char __user *current_offset;
1774         struct smb_com_read_rsp *pSMBr;
1775
1776         xid = GetXid();
1777         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1778         pTcon = cifs_sb->tcon;
1779
1780         if (file->private_data == NULL) {
1781                 FreeXid(xid);
1782                 return -EBADF;
1783         }
1784         open_file = (struct cifsFileInfo *)file->private_data;
1785
1786         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1787                 cFYI(1, ("attempting read on write only file instance"));
1788
1789         for (total_read = 0, current_offset = read_data;
1790              read_size > total_read;
1791              total_read += bytes_read, current_offset += bytes_read) {
1792                 current_read_size = min_t(const int, read_size - total_read,
1793                                           cifs_sb->rsize);
1794                 rc = -EAGAIN;
1795                 smb_read_data = NULL;
1796                 while (rc == -EAGAIN) {
1797                         int buf_type = CIFS_NO_BUFFER;
1798                         if ((open_file->invalidHandle) &&
1799                             (!open_file->closePend)) {
1800                                 rc = cifs_reopen_file(file, true);
1801                                 if (rc != 0)
1802                                         break;
1803                         }
1804                         rc = CIFSSMBRead(xid, pTcon,
1805                                          open_file->netfid,
1806                                          current_read_size, *poffset,
1807                                          &bytes_read, &smb_read_data,
1808                                          &buf_type);
1809                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1810                         if (smb_read_data) {
1811                                 if (copy_to_user(current_offset,
1812                                                 smb_read_data +
1813                                                 4 /* RFC1001 length field */ +
1814                                                 le16_to_cpu(pSMBr->DataOffset),
1815                                                 bytes_read))
1816                                         rc = -EFAULT;
1817
1818                                 if (buf_type == CIFS_SMALL_BUFFER)
1819                                         cifs_small_buf_release(smb_read_data);
1820                                 else if (buf_type == CIFS_LARGE_BUFFER)
1821                                         cifs_buf_release(smb_read_data);
1822                                 smb_read_data = NULL;
1823                         }
1824                 }
1825                 if (rc || (bytes_read == 0)) {
1826                         if (total_read) {
1827                                 break;
1828                         } else {
1829                                 FreeXid(xid);
1830                                 return rc;
1831                         }
1832                 } else {
1833                         cifs_stats_bytes_read(pTcon, bytes_read);
1834                         *poffset += bytes_read;
1835                 }
1836         }
1837         FreeXid(xid);
1838         return total_read;
1839 }
1840
1841
1842 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1843         loff_t *poffset)
1844 {
1845         int rc = -EACCES;
1846         unsigned int bytes_read = 0;
1847         unsigned int total_read;
1848         unsigned int current_read_size;
1849         struct cifs_sb_info *cifs_sb;
1850         struct cifsTconInfo *pTcon;
1851         int xid;
1852         char *current_offset;
1853         struct cifsFileInfo *open_file;
1854         int buf_type = CIFS_NO_BUFFER;
1855
1856         xid = GetXid();
1857         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1858         pTcon = cifs_sb->tcon;
1859
1860         if (file->private_data == NULL) {
1861                 FreeXid(xid);
1862                 return -EBADF;
1863         }
1864         open_file = (struct cifsFileInfo *)file->private_data;
1865
1866         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1867                 cFYI(1, ("attempting read on write only file instance"));
1868
1869         for (total_read = 0, current_offset = read_data;
1870              read_size > total_read;
1871              total_read += bytes_read, current_offset += bytes_read) {
1872                 current_read_size = min_t(const int, read_size - total_read,
1873                                           cifs_sb->rsize);
1874                 /* For windows me and 9x we do not want to request more
1875                 than it negotiated since it will refuse the read then */
1876                 if ((pTcon->ses) &&
1877                         !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1878                         current_read_size = min_t(const int, current_read_size,
1879                                         pTcon->ses->server->maxBuf - 128);
1880                 }
1881                 rc = -EAGAIN;
1882                 while (rc == -EAGAIN) {
1883                         if ((open_file->invalidHandle) &&
1884                             (!open_file->closePend)) {
1885                                 rc = cifs_reopen_file(file, true);
1886                                 if (rc != 0)
1887                                         break;
1888                         }
1889                         rc = CIFSSMBRead(xid, pTcon,
1890                                          open_file->netfid,
1891                                          current_read_size, *poffset,
1892                                          &bytes_read, &current_offset,
1893                                          &buf_type);
1894                 }
1895                 if (rc || (bytes_read == 0)) {
1896                         if (total_read) {
1897                                 break;
1898                         } else {
1899                                 FreeXid(xid);
1900                                 return rc;
1901                         }
1902                 } else {
1903                         cifs_stats_bytes_read(pTcon, total_read);
1904                         *poffset += bytes_read;
1905                 }
1906         }
1907         FreeXid(xid);
1908         return total_read;
1909 }
1910
1911 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1912 {
1913         struct dentry *dentry = file->f_path.dentry;
1914         int rc, xid;
1915
1916         xid = GetXid();
1917         rc = cifs_revalidate(dentry);
1918         if (rc) {
1919                 cFYI(1, ("Validation prior to mmap failed, error=%d", rc));
1920                 FreeXid(xid);
1921                 return rc;
1922         }
1923         rc = generic_file_mmap(file, vma);
1924         FreeXid(xid);
1925         return rc;
1926 }
1927
1928
1929 static void cifs_copy_cache_pages(struct address_space *mapping,
1930         struct list_head *pages, int bytes_read, char *data,
1931         struct pagevec *plru_pvec)
1932 {
1933         struct page *page;
1934         char *target;
1935
1936         while (bytes_read > 0) {
1937                 if (list_empty(pages))
1938                         break;
1939
1940                 page = list_entry(pages->prev, struct page, lru);
1941                 list_del(&page->lru);
1942
1943                 if (add_to_page_cache(page, mapping, page->index,
1944                                       GFP_KERNEL)) {
1945                         page_cache_release(page);
1946                         cFYI(1, ("Add page cache failed"));
1947                         data += PAGE_CACHE_SIZE;
1948                         bytes_read -= PAGE_CACHE_SIZE;
1949                         continue;
1950                 }
1951
1952                 target = kmap_atomic(page, KM_USER0);
1953
1954                 if (PAGE_CACHE_SIZE > bytes_read) {
1955                         memcpy(target, data, bytes_read);
1956                         /* zero the tail end of this partial page */
1957                         memset(target + bytes_read, 0,
1958                                PAGE_CACHE_SIZE - bytes_read);
1959                         bytes_read = 0;
1960                 } else {
1961                         memcpy(target, data, PAGE_CACHE_SIZE);
1962                         bytes_read -= PAGE_CACHE_SIZE;
1963                 }
1964                 kunmap_atomic(target, KM_USER0);
1965
1966                 flush_dcache_page(page);
1967                 SetPageUptodate(page);
1968                 unlock_page(page);
1969                 if (!pagevec_add(plru_pvec, page))
1970                         __pagevec_lru_add_file(plru_pvec);
1971                 data += PAGE_CACHE_SIZE;
1972         }
1973         return;
1974 }
1975
1976 static int cifs_readpages(struct file *file, struct address_space *mapping,
1977         struct list_head *page_list, unsigned num_pages)
1978 {
1979         int rc = -EACCES;
1980         int xid;
1981         loff_t offset;
1982         struct page *page;
1983         struct cifs_sb_info *cifs_sb;
1984         struct cifsTconInfo *pTcon;
1985         unsigned int bytes_read = 0;
1986         unsigned int read_size, i;
1987         char *smb_read_data = NULL;
1988         struct smb_com_read_rsp *pSMBr;
1989         struct pagevec lru_pvec;
1990         struct cifsFileInfo *open_file;
1991         int buf_type = CIFS_NO_BUFFER;
1992
1993         xid = GetXid();
1994         if (file->private_data == NULL) {
1995                 FreeXid(xid);
1996                 return -EBADF;
1997         }
1998         open_file = (struct cifsFileInfo *)file->private_data;
1999         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2000         pTcon = cifs_sb->tcon;
2001
2002         pagevec_init(&lru_pvec, 0);
2003         cFYI(DBG2, ("rpages: num pages %d", num_pages));
2004         for (i = 0; i < num_pages; ) {
2005                 unsigned contig_pages;
2006                 struct page *tmp_page;
2007                 unsigned long expected_index;
2008
2009                 if (list_empty(page_list))
2010                         break;
2011
2012                 page = list_entry(page_list->prev, struct page, lru);
2013                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2014
2015                 /* count adjacent pages that we will read into */
2016                 contig_pages = 0;
2017                 expected_index =
2018                         list_entry(page_list->prev, struct page, lru)->index;
2019                 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2020                         if (tmp_page->index == expected_index) {
2021                                 contig_pages++;
2022                                 expected_index++;
2023                         } else
2024                                 break;
2025                 }
2026                 if (contig_pages + i >  num_pages)
2027                         contig_pages = num_pages - i;
2028
2029                 /* for reads over a certain size could initiate async
2030                    read ahead */
2031
2032                 read_size = contig_pages * PAGE_CACHE_SIZE;
2033                 /* Read size needs to be in multiples of one page */
2034                 read_size = min_t(const unsigned int, read_size,
2035                                   cifs_sb->rsize & PAGE_CACHE_MASK);
2036                 cFYI(DBG2, ("rpages: read size 0x%x  contiguous pages %d",
2037                                 read_size, contig_pages));
2038                 rc = -EAGAIN;
2039                 while (rc == -EAGAIN) {
2040                         if ((open_file->invalidHandle) &&
2041                             (!open_file->closePend)) {
2042                                 rc = cifs_reopen_file(file, true);
2043                                 if (rc != 0)
2044                                         break;
2045                         }
2046
2047                         rc = CIFSSMBRead(xid, pTcon,
2048                                          open_file->netfid,
2049                                          read_size, offset,
2050                                          &bytes_read, &smb_read_data,
2051                                          &buf_type);
2052                         /* BB more RC checks ? */
2053                         if (rc == -EAGAIN) {
2054                                 if (smb_read_data) {
2055                                         if (buf_type == CIFS_SMALL_BUFFER)
2056                                                 cifs_small_buf_release(smb_read_data);
2057                                         else if (buf_type == CIFS_LARGE_BUFFER)
2058                                                 cifs_buf_release(smb_read_data);
2059                                         smb_read_data = NULL;
2060                                 }
2061                         }
2062                 }
2063                 if ((rc < 0) || (smb_read_data == NULL)) {
2064                         cFYI(1, ("Read error in readpages: %d", rc));
2065                         break;
2066                 } else if (bytes_read > 0) {
2067                         task_io_account_read(bytes_read);
2068                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2069                         cifs_copy_cache_pages(mapping, page_list, bytes_read,
2070                                 smb_read_data + 4 /* RFC1001 hdr */ +
2071                                 le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
2072
2073                         i +=  bytes_read >> PAGE_CACHE_SHIFT;
2074                         cifs_stats_bytes_read(pTcon, bytes_read);
2075                         if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2076                                 i++; /* account for partial page */
2077
2078                                 /* server copy of file can have smaller size
2079                                    than client */
2080                                 /* BB do we need to verify this common case ?
2081                                    this case is ok - if we are at server EOF
2082                                    we will hit it on next read */
2083
2084                                 /* break; */
2085                         }
2086                 } else {
2087                         cFYI(1, ("No bytes read (%d) at offset %lld . "
2088                                  "Cleaning remaining pages from readahead list",
2089                                  bytes_read, offset));
2090                         /* BB turn off caching and do new lookup on
2091                            file size at server? */
2092                         break;
2093                 }
2094                 if (smb_read_data) {
2095                         if (buf_type == CIFS_SMALL_BUFFER)
2096                                 cifs_small_buf_release(smb_read_data);
2097                         else if (buf_type == CIFS_LARGE_BUFFER)
2098                                 cifs_buf_release(smb_read_data);
2099                         smb_read_data = NULL;
2100                 }
2101                 bytes_read = 0;
2102         }
2103
2104         pagevec_lru_add_file(&lru_pvec);
2105
2106 /* need to free smb_read_data buf before exit */
2107         if (smb_read_data) {
2108                 if (buf_type == CIFS_SMALL_BUFFER)
2109                         cifs_small_buf_release(smb_read_data);
2110                 else if (buf_type == CIFS_LARGE_BUFFER)
2111                         cifs_buf_release(smb_read_data);
2112                 smb_read_data = NULL;
2113         }
2114
2115         FreeXid(xid);
2116         return rc;
2117 }
2118
2119 static int cifs_readpage_worker(struct file *file, struct page *page,
2120         loff_t *poffset)
2121 {
2122         char *read_data;
2123         int rc;
2124
2125         page_cache_get(page);
2126         read_data = kmap(page);
2127         /* for reads over a certain size could initiate async read ahead */
2128
2129         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2130
2131         if (rc < 0)
2132                 goto io_error;
2133         else
2134                 cFYI(1, ("Bytes read %d", rc));
2135
2136         file->f_path.dentry->d_inode->i_atime =
2137                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2138
2139         if (PAGE_CACHE_SIZE > rc)
2140                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2141
2142         flush_dcache_page(page);
2143         SetPageUptodate(page);
2144         rc = 0;
2145
2146 io_error:
2147         kunmap(page);
2148         page_cache_release(page);
2149         return rc;
2150 }
2151
2152 static int cifs_readpage(struct file *file, struct page *page)
2153 {
2154         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2155         int rc = -EACCES;
2156         int xid;
2157
2158         xid = GetXid();
2159
2160         if (file->private_data == NULL) {
2161                 FreeXid(xid);
2162                 return -EBADF;
2163         }
2164
2165         cFYI(1, ("readpage %p at offset %d 0x%x\n",
2166                  page, (int)offset, (int)offset));
2167
2168         rc = cifs_readpage_worker(file, page, &offset);
2169
2170         unlock_page(page);
2171
2172         FreeXid(xid);
2173         return rc;
2174 }
2175
2176 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2177 {
2178         struct cifsFileInfo *open_file;
2179
2180         read_lock(&GlobalSMBSeslock);
2181         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2182                 if (open_file->closePend)
2183                         continue;
2184                 if (open_file->pfile &&
2185                     ((open_file->pfile->f_flags & O_RDWR) ||
2186                      (open_file->pfile->f_flags & O_WRONLY))) {
2187                         read_unlock(&GlobalSMBSeslock);
2188                         return 1;
2189                 }
2190         }
2191         read_unlock(&GlobalSMBSeslock);
2192         return 0;
2193 }
2194
2195 /* We do not want to update the file size from server for inodes
2196    open for write - to avoid races with writepage extending
2197    the file - in the future we could consider allowing
2198    refreshing the inode only on increases in the file size
2199    but this is tricky to do without racing with writebehind
2200    page caching in the current Linux kernel design */
2201 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2202 {
2203         if (!cifsInode)
2204                 return true;
2205
2206         if (is_inode_writable(cifsInode)) {
2207                 /* This inode is open for write at least once */
2208                 struct cifs_sb_info *cifs_sb;
2209
2210                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2211                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2212                         /* since no page cache to corrupt on directio
2213                         we can change size safely */
2214                         return true;
2215                 }
2216
2217                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2218                         return true;
2219
2220                 return false;
2221         } else
2222                 return true;
2223 }
2224
2225 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2226                         loff_t pos, unsigned len, unsigned flags,
2227                         struct page **pagep, void **fsdata)
2228 {
2229         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2230         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2231         loff_t page_start = pos & PAGE_MASK;
2232         loff_t i_size;
2233         struct page *page;
2234         int rc = 0;
2235
2236         cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2237
2238         page = grab_cache_page_write_begin(mapping, index, flags);
2239         if (!page) {
2240                 rc = -ENOMEM;
2241                 goto out;
2242         }
2243
2244         if (PageUptodate(page))
2245                 goto out;
2246
2247         /*
2248          * If we write a full page it will be up to date, no need to read from
2249          * the server. If the write is short, we'll end up doing a sync write
2250          * instead.
2251          */
2252         if (len == PAGE_CACHE_SIZE)
2253                 goto out;
2254
2255         /*
2256          * optimize away the read when we have an oplock, and we're not
2257          * expecting to use any of the data we'd be reading in. That
2258          * is, when the page lies beyond the EOF, or straddles the EOF
2259          * and the write will cover all of the existing data.
2260          */
2261         if (CIFS_I(mapping->host)->clientCanCacheRead) {
2262                 i_size = i_size_read(mapping->host);
2263                 if (page_start >= i_size ||
2264                     (offset == 0 && (pos + len) >= i_size)) {
2265                         zero_user_segments(page, 0, offset,
2266                                            offset + len,
2267                                            PAGE_CACHE_SIZE);
2268                         /*
2269                          * PageChecked means that the parts of the page
2270                          * to which we're not writing are considered up
2271                          * to date. Once the data is copied to the
2272                          * page, it can be set uptodate.
2273                          */
2274                         SetPageChecked(page);
2275                         goto out;
2276                 }
2277         }
2278
2279         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2280                 /*
2281                  * might as well read a page, it is fast enough. If we get
2282                  * an error, we don't need to return it. cifs_write_end will
2283                  * do a sync write instead since PG_uptodate isn't set.
2284                  */
2285                 cifs_readpage_worker(file, page, &page_start);
2286         } else {
2287                 /* we could try using another file handle if there is one -
2288                    but how would we lock it to prevent close of that handle
2289                    racing with this read? In any case
2290                    this will be written out by write_end so is fine */
2291         }
2292 out:
2293         *pagep = page;
2294         return rc;
2295 }
2296
2297 const struct address_space_operations cifs_addr_ops = {
2298         .readpage = cifs_readpage,
2299         .readpages = cifs_readpages,
2300         .writepage = cifs_writepage,
2301         .writepages = cifs_writepages,
2302         .write_begin = cifs_write_begin,
2303         .write_end = cifs_write_end,
2304         .set_page_dirty = __set_page_dirty_nobuffers,
2305         /* .sync_page = cifs_sync_page, */
2306         /* .direct_IO = */
2307 };
2308
2309 /*
2310  * cifs_readpages requires the server to support a buffer large enough to
2311  * contain the header plus one complete page of data.  Otherwise, we need
2312  * to leave cifs_readpages out of the address space operations.
2313  */
2314 const struct address_space_operations cifs_addr_ops_smallbuf = {
2315         .readpage = cifs_readpage,
2316         .writepage = cifs_writepage,
2317         .writepages = cifs_writepages,
2318         .write_begin = cifs_write_begin,
2319         .write_end = cifs_write_end,
2320         .set_page_dirty = __set_page_dirty_nobuffers,
2321         /* .sync_page = cifs_sync_page, */
2322         /* .direct_IO = */
2323 };