[CIFS] fix posix open regression
[safe/jmp/linux-2.6] / fs / cifs / file.c
index f9bd8b8..302ea15 100644 (file)
@@ -46,13 +46,13 @@ static inline struct cifsFileInfo *cifs_init_private(
        memset(private_data, 0, sizeof(struct cifsFileInfo));
        private_data->netfid = netfid;
        private_data->pid = current->tgid;
-       init_MUTEX(&private_data->fh_sem);
+       mutex_init(&private_data->fh_mutex);
        mutex_init(&private_data->lock_mutex);
        INIT_LIST_HEAD(&private_data->llist);
        private_data->pfile = file; /* needed for writepage */
        private_data->pInode = inode;
-       private_data->invalidHandle = FALSE;
-       private_data->closePend = FALSE;
+       private_data->invalidHandle = false;
+       private_data->closePend = false;
        /* we have to track num writers to the inode, since writepages
        does not tell us which handle the write is for so there can
        be a close (overlapping with write) of the filehandle that
@@ -75,7 +75,39 @@ static inline int cifs_convert_flags(unsigned int flags)
                return (GENERIC_READ | GENERIC_WRITE);
        }
 
-       return 0x20197;
+       return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
+               FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
+               FILE_READ_DATA);
+}
+
+static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
+{
+       fmode_t posix_flags = 0;
+
+       if ((flags & O_ACCMODE) == O_RDONLY)
+               posix_flags = FMODE_READ;
+       else if ((flags & O_ACCMODE) == O_WRONLY)
+               posix_flags = FMODE_WRITE;
+       else if ((flags & O_ACCMODE) == O_RDWR) {
+               /* GENERIC_ALL is too much permission to request
+                  can cause unnecessary access denied on create */
+               /* return GENERIC_ALL; */
+               posix_flags = FMODE_READ | FMODE_WRITE;
+       }
+       /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
+          reopening a file.  They had their effect on the original open */
+       if (flags & O_APPEND)
+               posix_flags |= (fmode_t)O_APPEND;
+       if (flags & O_SYNC)
+               posix_flags |= (fmode_t)O_SYNC;
+       if (flags & O_DIRECTORY)
+               posix_flags |= (fmode_t)O_DIRECTORY;
+       if (flags & O_NOFOLLOW)
+               posix_flags |= (fmode_t)O_NOFOLLOW;
+       if (flags & O_DIRECT)
+               posix_flags |= (fmode_t)O_DIRECT;
+
+       return posix_flags;
 }
 
 static inline int cifs_get_disposition(unsigned int flags)
@@ -93,6 +125,94 @@ static inline int cifs_get_disposition(unsigned int flags)
 }
 
 /* all arguments to this function must be checked for validity in caller */
+static inline int cifs_posix_open_inode_helper(struct inode *inode,
+                       struct file *file, struct cifsInodeInfo *pCifsInode,
+                       struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
+{
+
+       write_lock(&GlobalSMBSeslock);
+
+       pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
+       if (pCifsInode == NULL) {
+               write_unlock(&GlobalSMBSeslock);
+               return -EINVAL;
+       }
+
+       if (pCifsInode->clientCanCacheRead) {
+               /* we have the inode open somewhere else
+                  no need to discard cache data */
+               goto psx_client_can_cache;
+       }
+
+       /* BB FIXME need to fix this check to move it earlier into posix_open
+          BB  fIX following section BB FIXME */
+
+       /* if not oplocked, invalidate inode pages if mtime or file
+          size changed */
+/*     temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
+       if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
+                          (file->f_path.dentry->d_inode->i_size ==
+                           (loff_t)le64_to_cpu(buf->EndOfFile))) {
+               cFYI(1, ("inode unchanged on server"));
+       } else {
+               if (file->f_path.dentry->d_inode->i_mapping) {
+                       rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+                       if (rc != 0)
+                               CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+               }
+               cFYI(1, ("invalidating remote inode since open detected it "
+                        "changed"));
+               invalidate_remote_inode(file->f_path.dentry->d_inode);
+       } */
+
+psx_client_can_cache:
+       if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+               pCifsInode->clientCanCacheAll = true;
+               pCifsInode->clientCanCacheRead = true;
+               cFYI(1, ("Exclusive Oplock granted on inode %p",
+                        file->f_path.dentry->d_inode));
+       } else if ((oplock & 0xF) == OPLOCK_READ)
+               pCifsInode->clientCanCacheRead = true;
+
+       /* will have to change the unlock if we reenable the
+          filemap_fdatawrite (which does not seem necessary */
+       write_unlock(&GlobalSMBSeslock);
+       return 0;
+}
+
+static struct cifsFileInfo *
+cifs_fill_filedata(struct file *file)
+{
+       struct list_head *tmp;
+       struct cifsFileInfo *pCifsFile = NULL;
+       struct cifsInodeInfo *pCifsInode = NULL;
+
+       /* search inode for this file and fill in file->private_data */
+       pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
+       read_lock(&GlobalSMBSeslock);
+       list_for_each(tmp, &pCifsInode->openFileList) {
+               pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
+               if ((pCifsFile->pfile == NULL) &&
+                   (pCifsFile->pid == current->tgid)) {
+                       /* mode set in cifs_create */
+
+                       /* needed for writepage */
+                       pCifsFile->pfile = file;
+                       file->private_data = pCifsFile;
+                       break;
+               }
+       }
+       read_unlock(&GlobalSMBSeslock);
+
+       if (file->private_data != NULL) {
+               return pCifsFile;
+       } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
+                       cERROR(1, ("could not find file instance for "
+                                  "new file %p", file));
+       return NULL;
+}
+
+/* all arguments to this function must be checked for validity in caller */
 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
        struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
        struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
@@ -103,7 +223,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 
        /* want handles we can use to read with first
           in the list so we do not have to walk the
-          list to search for one in prepare_write */
+          list to search for one in write_begin */
        if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
                list_add_tail(&pCifsFile->flist,
                              &pCifsInode->openFileList);
@@ -130,7 +250,9 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
                if (file->f_path.dentry->d_inode->i_mapping) {
                /* BB no need to lock inode until after invalidate
                   since namei code should already have it locked? */
-                       filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+                       rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+                       if (rc != 0)
+                               CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
                }
                cFYI(1, ("invalidating remote inode since open detected it "
                         "changed"));
@@ -143,15 +265,15 @@ client_can_cache:
                        full_path, inode->i_sb, xid);
        else
                rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
-                       full_path, buf, inode->i_sb, xid);
+                       full_path, buf, inode->i_sb, xid, NULL);
 
        if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-               pCifsInode->clientCanCacheAll = TRUE;
-               pCifsInode->clientCanCacheRead = TRUE;
+               pCifsInode->clientCanCacheAll = true;
+               pCifsInode->clientCanCacheRead = true;
                cFYI(1, ("Exclusive Oplock granted on inode %p",
                         file->f_path.dentry->d_inode));
        } else if ((*oplock & 0xF) == OPLOCK_READ)
-               pCifsInode->clientCanCacheRead = TRUE;
+               pCifsInode->clientCanCacheRead = true;
 
        return rc;
 }
@@ -161,10 +283,9 @@ int cifs_open(struct inode *inode, struct file *file)
        int rc = -EACCES;
        int xid, oplock;
        struct cifs_sb_info *cifs_sb;
-       struct cifsTconInfo *pTcon;
+       struct cifsTconInfo *tcon;
        struct cifsFileInfo *pCifsFile;
        struct cifsInodeInfo *pCifsInode;
-       struct list_head *tmp;
        char *full_path = NULL;
        int desiredAccess;
        int disposition;
@@ -174,36 +295,13 @@ int cifs_open(struct inode *inode, struct file *file)
        xid = GetXid();
 
        cifs_sb = CIFS_SB(inode->i_sb);
-       pTcon = cifs_sb->tcon;
+       tcon = cifs_sb->tcon;
 
-       if (file->f_flags & O_CREAT) {
-               /* search inode for this file and fill in file->private_data */
-               pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-               read_lock(&GlobalSMBSeslock);
-               list_for_each(tmp, &pCifsInode->openFileList) {
-                       pCifsFile = list_entry(tmp, struct cifsFileInfo,
-                                              flist);
-                       if ((pCifsFile->pfile == NULL) &&
-                           (pCifsFile->pid == current->tgid)) {
-                               /* mode set in cifs_create */
-
-                               /* needed for writepage */
-                               pCifsFile->pfile = file;
-
-                               file->private_data = pCifsFile;
-                               break;
-                       }
-               }
-               read_unlock(&GlobalSMBSeslock);
-               if (file->private_data != NULL) {
-                       rc = 0;
-                       FreeXid(xid);
-                       return rc;
-               } else {
-                       if (file->f_flags & O_EXCL)
-                               cERROR(1, ("could not find file instance for "
-                                          "new file %p", file));
-               }
+       pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
+       pCifsFile = cifs_fill_filedata(file);
+       if (pCifsFile) {
+               FreeXid(xid);
+               return 0;
        }
 
        full_path = build_path_from_dentry(file->f_path.dentry);
@@ -214,6 +312,46 @@ int cifs_open(struct inode *inode, struct file *file)
 
        cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
                 inode, file->f_flags, full_path));
+
+       if (oplockEnabled)
+               oplock = REQ_OPLOCK;
+       else
+               oplock = 0;
+
+       if (!tcon->broken_posix_open && tcon->unix_ext &&
+           (tcon->ses->capabilities & CAP_UNIX) &&
+           (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+                       le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+               int oflags = (int) cifs_posix_convert_flags(file->f_flags);
+               /* can not refresh inode info since size could be stale */
+               rc = cifs_posix_open(full_path, &inode, inode->i_sb,
+                                    cifs_sb->mnt_file_mode /* ignored */,
+                                    oflags, &oplock, &netfid, xid);
+               if (rc == 0) {
+                       cFYI(1, ("posix open succeeded"));
+                       /* no need for special case handling of setting mode
+                          on read only files needed here */
+
+                       pCifsFile = cifs_fill_filedata(file);
+                       cifs_posix_open_inode_helper(inode, file, pCifsInode,
+                                                    pCifsFile, oplock, netfid);
+                       goto out;
+               } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+                       if (tcon->ses->serverNOS)
+                               cERROR(1, ("server %s of type %s returned"
+                                          " unexpected error on SMB posix open"
+                                          ", disabling posix open support."
+                                          " Check if server update available.",
+                                          tcon->ses->serverName,
+                                          tcon->ses->serverNOS));
+                       tcon->broken_posix_open = true;
+               } else if ((rc != -EIO) && (rc != -EREMOTE) &&
+                        (rc != -EOPNOTSUPP)) /* path not found or net err */
+                       goto out;
+               /* else fallthrough to retry open the old way on network i/o
+                  or DFS errors */
+       }
+
        desiredAccess = cifs_convert_flags(file->f_flags);
 
 /*********************************************************************
@@ -242,11 +380,6 @@ int cifs_open(struct inode *inode, struct file *file)
 
        disposition = cifs_get_disposition(file->f_flags);
 
-       if (oplockEnabled)
-               oplock = REQ_OPLOCK;
-       else
-               oplock = FALSE;
-
        /* BB pass O_SYNC flag through on file attributes .. BB */
 
        /* Also refresh inode by passing in file_info buf returned by SMBOpen
@@ -263,7 +396,7 @@ int cifs_open(struct inode *inode, struct file *file)
        }
 
        if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
-               rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
+               rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
                         desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
                                 & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -272,7 +405,7 @@ int cifs_open(struct inode *inode, struct file *file)
 
        if (rc == -EIO) {
                /* Old server, try legacy style OpenX */
-               rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
+               rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
                        desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
                        cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
                                & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -289,12 +422,12 @@ int cifs_open(struct inode *inode, struct file *file)
        }
        pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
        write_lock(&GlobalSMBSeslock);
-       list_add(&pCifsFile->tlist, &pTcon->openFileList);
+       list_add(&pCifsFile->tlist, &tcon->openFileList);
 
        pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
        if (pCifsInode) {
                rc = cifs_open_inode_helper(inode, file, pCifsInode,
-                                           pCifsFile, pTcon,
+                                           pCifsFile, tcon,
                                            &oplock, buf, full_path, xid);
        } else {
                write_unlock(&GlobalSMBSeslock);
@@ -303,19 +436,20 @@ int cifs_open(struct inode *inode, struct file *file)
        if (oplock & CIFS_CREATE_ACTION) {
                /* time to set mode which we can not set earlier due to
                   problems creating new read-only files */
-               if (pTcon->unix_ext) {
-                       CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-                                           inode->i_mode,
-                                           (__u64)-1, (__u64)-1, 0 /* dev */,
+               if (tcon->unix_ext) {
+                       struct cifs_unix_set_info_args args = {
+                               .mode   = inode->i_mode,
+                               .uid    = NO_CHANGE_64,
+                               .gid    = NO_CHANGE_64,
+                               .ctime  = NO_CHANGE_64,
+                               .atime  = NO_CHANGE_64,
+                               .mtime  = NO_CHANGE_64,
+                               .device = 0,
+                       };
+                       CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
                                            cifs_sb->local_nls,
                                            cifs_sb->mnt_cifs_flags &
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-               } else {
-                       /* BB implement via Windows security descriptors eg
-                          CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
-                                             -1, -1, local_nls);
-                          in the meantime could set r/o dos attribute when
-                          perms are eg: mode & 0222 == 0 */
                }
        }
 
@@ -337,12 +471,12 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile)
        return rc;
 }
 
-static int cifs_reopen_file(struct file *file, int can_flush)
+static int cifs_reopen_file(struct file *file, bool can_flush)
 {
        int rc = -EACCES;
        int xid, oplock;
        struct cifs_sb_info *cifs_sb;
-       struct cifsTconInfo *pTcon;
+       struct cifsTconInfo *tcon;
        struct cifsFileInfo *pCifsFile;
        struct cifsInodeInfo *pCifsInode;
        struct inode *inode;
@@ -351,15 +485,15 @@ static int cifs_reopen_file(struct file *file, int can_flush)
        int disposition = FILE_OPEN;
        __u16 netfid;
 
-       if (file->private_data) {
+       if (file->private_data)
                pCifsFile = (struct cifsFileInfo *)file->private_data;
-       else
+       else
                return -EBADF;
 
        xid = GetXid();
-       down(&pCifsFile->fh_sem);
-       if (pCifsFile->invalidHandle == FALSE) {
-               up(&pCifsFile->fh_sem);
+       mutex_unlock(&pCifsFile->fh_mutex);
+       if (!pCifsFile->invalidHandle) {
+               mutex_lock(&pCifsFile->fh_mutex);
                FreeXid(xid);
                return 0;
        }
@@ -380,7 +514,7 @@ static int cifs_reopen_file(struct file *file, int can_flush)
        }
 
        cifs_sb = CIFS_SB(inode->i_sb);
-       pTcon = cifs_sb->tcon;
+       tcon = cifs_sb->tcon;
 
 /* can not grab rename sem here because various ops, including
    those that already have the rename sem can end up causing writepage
@@ -390,19 +524,36 @@ static int cifs_reopen_file(struct file *file, int can_flush)
        if (full_path == NULL) {
                rc = -ENOMEM;
 reopen_error_exit:
-               up(&pCifsFile->fh_sem);
+               mutex_lock(&pCifsFile->fh_mutex);
                FreeXid(xid);
                return rc;
        }
 
        cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
                 inode, file->f_flags, full_path));
-       desiredAccess = cifs_convert_flags(file->f_flags);
 
        if (oplockEnabled)
                oplock = REQ_OPLOCK;
        else
-               oplock = FALSE;
+               oplock = 0;
+
+       if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
+           (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+                       le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+               int oflags = (int) cifs_posix_convert_flags(file->f_flags);
+               /* can not refresh inode info since size could be stale */
+               rc = cifs_posix_open(full_path, NULL, inode->i_sb,
+                                    cifs_sb->mnt_file_mode /* ignored */,
+                                    oflags, &oplock, &netfid, xid);
+               if (rc == 0) {
+                       cFYI(1, ("posix reopen succeeded"));
+                       goto reopen_success;
+               }
+               /* fallthrough to retry open the old way on errors, especially
+                  in the reconnect path it is important to retry hard */
+       }
+
+       desiredAccess = cifs_convert_flags(file->f_flags);
 
        /* Can not refresh inode by passing in file_info buf to be returned
           by SMBOpen and then calling get_inode_info with returned buf
@@ -410,33 +561,36 @@ reopen_error_exit:
           and server version of file size can be stale. If we knew for sure
           that inode was not dirty locally we could do this */
 
-       rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess,
+       rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
                         CREATE_NOT_DIR, &netfid, &oplock, NULL,
                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
                                CIFS_MOUNT_MAP_SPECIAL_CHR);
        if (rc) {
-               up(&pCifsFile->fh_sem);
+               mutex_lock(&pCifsFile->fh_mutex);
                cFYI(1, ("cifs_open returned 0x%x", rc));
                cFYI(1, ("oplock: %d", oplock));
        } else {
+reopen_success:
                pCifsFile->netfid = netfid;
-               pCifsFile->invalidHandle = FALSE;
-               up(&pCifsFile->fh_sem);
+               pCifsFile->invalidHandle = false;
+               mutex_lock(&pCifsFile->fh_mutex);
                pCifsInode = CIFS_I(inode);
                if (pCifsInode) {
                        if (can_flush) {
-                               filemap_write_and_wait(inode->i_mapping);
+                               rc = filemap_write_and_wait(inode->i_mapping);
+                               if (rc != 0)
+                                       CIFS_I(inode)->write_behind_rc = rc;
                        /* temporarily disable caching while we
                           go to server to get inode info */
-                               pCifsInode->clientCanCacheAll = FALSE;
-                               pCifsInode->clientCanCacheRead = FALSE;
-                               if (pTcon->unix_ext)
+                               pCifsInode->clientCanCacheAll = false;
+                               pCifsInode->clientCanCacheRead = false;
+                               if (tcon->unix_ext)
                                        rc = cifs_get_inode_info_unix(&inode,
                                                full_path, inode->i_sb, xid);
                                else
                                        rc = cifs_get_inode_info(&inode,
                                                full_path, NULL, inode->i_sb,
-                                               xid);
+                                               xid, NULL);
                        } /* else we are writing out data to server already
                             and could deadlock if we tried to flush data, and
                             since we do not know if we have data that would
@@ -444,21 +598,20 @@ reopen_error_exit:
                             we can not go to the server to get the new inod
                             info */
                        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-                               pCifsInode->clientCanCacheAll = TRUE;
-                               pCifsInode->clientCanCacheRead = TRUE;
+                               pCifsInode->clientCanCacheAll = true;
+                               pCifsInode->clientCanCacheRead = true;
                                cFYI(1, ("Exclusive Oplock granted on inode %p",
                                         file->f_path.dentry->d_inode));
                        } else if ((oplock & 0xF) == OPLOCK_READ) {
-                               pCifsInode->clientCanCacheRead = TRUE;
-                               pCifsInode->clientCanCacheAll = FALSE;
+                               pCifsInode->clientCanCacheRead = true;
+                               pCifsInode->clientCanCacheAll = false;
                        } else {
-                               pCifsInode->clientCanCacheRead = FALSE;
-                               pCifsInode->clientCanCacheAll = FALSE;
+                               pCifsInode->clientCanCacheRead = false;
+                               pCifsInode->clientCanCacheAll = false;
                        }
                        cifs_relock_file(pCifsFile);
                }
        }
-
        kfree(full_path);
        FreeXid(xid);
        return rc;
@@ -467,7 +620,7 @@ reopen_error_exit:
 int cifs_close(struct inode *inode, struct file *file)
 {
        int rc = 0;
-       int xid;
+       int xid, timeout;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
        struct cifsFileInfo *pSMBFile =
@@ -479,15 +632,16 @@ int cifs_close(struct inode *inode, struct file *file)
        pTcon = cifs_sb->tcon;
        if (pSMBFile) {
                struct cifsLockInfo *li, *tmp;
-
-               pSMBFile->closePend = TRUE;
+               write_lock(&GlobalSMBSeslock);
+               pSMBFile->closePend = true;
                if (pTcon) {
                        /* no sense reconnecting to close a file that is
                           already closed */
-                       if (pTcon->tidStatus != CifsNeedReconnect) {
-                               int timeout = 2;
+                       if (!pTcon->need_reconnect) {
+                               write_unlock(&GlobalSMBSeslock);
+                               timeout = 2;
                                while ((atomic_read(&pSMBFile->wrtPending) != 0)
-                                        && (timeout < 1000) ) {
+                                       && (timeout <= 2048)) {
                                        /* Give write a better chance to get to
                                        server ahead of the close.  We do not
                                        want to add a wait_q here as it would
@@ -495,19 +649,21 @@ int cifs_close(struct inode *inode, struct file *file)
                                        the struct would be in each open file,
                                        but this should give enough time to
                                        clear the socket */
-#ifdef CONFIG_CIFS_DEBUG2
-                                       cFYI(1, ("close delay, write pending"));
-#endif /* DEBUG2 */
+                                       cFYI(DBG2,
+                                               ("close delay, write pending"));
                                        msleep(timeout);
                                        timeout *= 4;
                                }
                                if (atomic_read(&pSMBFile->wrtPending))
-                                       cERROR(1,
-                                               ("close with pending writes"));
-                               rc = CIFSSMBClose(xid, pTcon,
+                                       cERROR(1, ("close with pending write"));
+                               if (!pTcon->need_reconnect &&
+                                   !pSMBFile->invalidHandle)
+                                       rc = CIFSSMBClose(xid, pTcon,
                                                  pSMBFile->netfid);
-                       }
-               }
+                       } else
+                               write_unlock(&GlobalSMBSeslock);
+               } else
+                       write_unlock(&GlobalSMBSeslock);
 
                /* Delete any outstanding lock records.
                   We'll lose them when the file is closed anyway. */
@@ -522,19 +678,37 @@ int cifs_close(struct inode *inode, struct file *file)
                list_del(&pSMBFile->flist);
                list_del(&pSMBFile->tlist);
                write_unlock(&GlobalSMBSeslock);
-               kfree(pSMBFile->search_resume_name);
+               timeout = 10;
+               /* We waited above to give the SMBWrite a chance to issue
+                  on the wire (so we do not get SMBWrite returning EBADF
+                  if writepages is racing with close.  Note that writepages
+                  does not specify a file handle, so it is possible for a file
+                  to be opened twice, and the application close the "wrong"
+                  file handle - in these cases we delay long enough to allow
+                  the SMBWrite to get on the wire before the SMB Close.
+                  We allow total wait here over 45 seconds, more than
+                  oplock break time, and more than enough to allow any write
+                  to complete on the server, or to time out on the client */
+               while ((atomic_read(&pSMBFile->wrtPending) != 0)
+                               && (timeout <= 50000)) {
+                       cERROR(1, ("writes pending, delay free of handle"));
+                       msleep(timeout);
+                       timeout *= 8;
+               }
                kfree(file->private_data);
                file->private_data = NULL;
        } else
                rc = -EBADF;
 
+       read_lock(&GlobalSMBSeslock);
        if (list_empty(&(CIFS_I(inode)->openFileList))) {
                cFYI(1, ("closing last open instance for inode %p", inode));
                /* if the file is not open we do not know if we can cache info
                   on this inode, much less write behind and read ahead */
-               CIFS_I(inode)->clientCanCacheRead = FALSE;
-               CIFS_I(inode)->clientCanCacheAll  = FALSE;
+               CIFS_I(inode)->clientCanCacheRead = false;
+               CIFS_I(inode)->clientCanCacheAll  = false;
        }
+       read_unlock(&GlobalSMBSeslock);
        if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
                rc = CIFS_I(inode)->write_behind_rc;
        FreeXid(xid);
@@ -561,15 +735,18 @@ int cifs_closedir(struct inode *inode, struct file *file)
                pTcon = cifs_sb->tcon;
 
                cFYI(1, ("Freeing private data in close dir"));
-               if ((pCFileStruct->srch_inf.endOfSearch == FALSE) &&
-                  (pCFileStruct->invalidHandle == FALSE)) {
-                       pCFileStruct->invalidHandle = TRUE;
+               write_lock(&GlobalSMBSeslock);
+               if (!pCFileStruct->srch_inf.endOfSearch &&
+                   !pCFileStruct->invalidHandle) {
+                       pCFileStruct->invalidHandle = true;
+                       write_unlock(&GlobalSMBSeslock);
                        rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
                        cFYI(1, ("Closing uncompleted readdir with rc %d",
                                 rc));
                        /* not much we can do if it fails anyway, ignore rc */
                        rc = 0;
-               }
+               } else
+                       write_unlock(&GlobalSMBSeslock);
                ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
                if (ptmp) {
                        cFYI(1, ("closedir free smb buf in srch struct"));
@@ -579,12 +756,6 @@ int cifs_closedir(struct inode *inode, struct file *file)
                        else
                                cifs_buf_release(ptmp);
                }
-               ptmp = pCFileStruct->search_resume_name;
-               if (ptmp) {
-                       cFYI(1, ("closedir free resume name"));
-                       pCFileStruct->search_resume_name = NULL;
-                       kfree(ptmp);
-               }
                kfree(file->private_data);
                file->private_data = NULL;
        }
@@ -615,12 +786,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
        __u32 numLock = 0;
        __u32 numUnlock = 0;
        __u64 length;
-       int wait_flag = FALSE;
+       bool wait_flag = false;
        struct cifs_sb_info *cifs_sb;
-       struct cifsTconInfo *pTcon;
+       struct cifsTconInfo *tcon;
        __u16 netfid;
        __u8 lockType = LOCKING_ANDX_LARGE_FILES;
-       int posix_locking;
+       bool posix_locking = 0;
 
        length = 1 + pfLock->fl_end - pfLock->fl_start;
        rc = -EACCES;
@@ -637,7 +808,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                cFYI(1, ("Flock"));
        if (pfLock->fl_flags & FL_SLEEP) {
                cFYI(1, ("Blocking lock"));
-               wait_flag = TRUE;
+               wait_flag = true;
        }
        if (pfLock->fl_flags & FL_ACCESS)
                cFYI(1, ("Process suspended by mandatory locking - "
@@ -671,7 +842,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                cFYI(1, ("Unknown type of lock"));
 
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-       pTcon = cifs_sb->tcon;
+       tcon = cifs_sb->tcon;
 
        if (file->private_data == NULL) {
                FreeXid(xid);
@@ -679,9 +850,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
        }
        netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
 
-       posix_locking = (cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
-                       (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability));
-
+       if ((tcon->ses->capabilities & CAP_UNIX) &&
+           (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+           ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+               posix_locking = 1;
        /* BB add code here to normalize offset and length to
        account for negative length which we can not accept over the
        wire */
@@ -692,7 +864,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                                posix_lock_type = CIFS_RDLCK;
                        else
                                posix_lock_type = CIFS_WRLCK;
-                       rc = CIFSSMBPosixLock(xid, pTcon, netfid, 1 /* get */,
+                       rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
                                        length, pfLock,
                                        posix_lock_type, wait_flag);
                        FreeXid(xid);
@@ -700,10 +872,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                }
 
                /* BB we could chain these into one lock request BB */
-               rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start,
+               rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
                                 0, 1, lockType, 0 /* wait flag */ );
                if (rc == 0) {
-                       rc = CIFSSMBLock(xid, pTcon, netfid, length,
+                       rc = CIFSSMBLock(xid, tcon, netfid, length,
                                         pfLock->fl_start, 1 /* numUnlock */ ,
                                         0 /* numLock */ , lockType,
                                         0 /* wait flag */ );
@@ -740,7 +912,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                if (numUnlock == 1)
                        posix_lock_type = CIFS_UNLCK;
 
-               rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */,
+               rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
                                      length, pfLock,
                                      posix_lock_type, wait_flag);
        } else {
@@ -748,7 +920,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                        (struct cifsFileInfo *)file->private_data;
 
                if (numLock) {
-                       rc = CIFSSMBLock(xid, pTcon, netfid, length,
+                       rc = CIFSSMBLock(xid, tcon, netfid, length,
                                        pfLock->fl_start,
                                        0, numLock, lockType, wait_flag);
 
@@ -767,12 +939,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                        mutex_lock(&fid->lock_mutex);
                        list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
                                if (pfLock->fl_start <= li->offset &&
-                                               (pflock->fl_start + length) >=
+                                               (pfLock->fl_start + length) >=
                                                (li->offset + li->length)) {
-                                       stored_rc = CIFSSMBLock(xid, pTcon,
+                                       stored_rc = CIFSSMBLock(xid, tcon,
                                                        netfid,
                                                        li->length, li->offset,
-                                                       1, 0, li->type, FALSE);
+                                                       1, 0, li->type, false);
                                        if (stored_rc)
                                                rc = stored_rc;
 
@@ -790,6 +962,40 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
        return rc;
 }
 
+/*
+ * Set the timeout on write requests past EOF. For some servers (Windows)
+ * these calls can be very long.
+ *
+ * If we're writing >10M past the EOF we give a 180s timeout. Anything less
+ * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
+ * The 10M cutoff is totally arbitrary. A better scheme for this would be
+ * welcome if someone wants to suggest one.
+ *
+ * We may be able to do a better job with this if there were some way to
+ * declare that a file should be sparse.
+ */
+static int
+cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
+{
+       if (offset <= cifsi->server_eof)
+               return CIFS_STD_OP;
+       else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
+               return CIFS_VLONG_OP;
+       else
+               return CIFS_LONG_OP;
+}
+
+/* update the file size (if needed) after a write */
+static void
+cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
+                     unsigned int bytes_written)
+{
+       loff_t end_of_write = offset + bytes_written;
+
+       if (end_of_write > cifsi->server_eof)
+               cifsi->server_eof = end_of_write;
+}
+
 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        size_t write_size, loff_t *poffset)
 {
@@ -800,6 +1006,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        struct cifsTconInfo *pTcon;
        int xid, long_op;
        struct cifsFileInfo *open_file;
+       struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
 
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 
@@ -813,13 +1020,13 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
                return -EBADF;
        open_file = (struct cifsFileInfo *) file->private_data;
 
-       xid = GetXid();
+       rc = generic_write_checks(file, poffset, &write_size, 0);
+       if (rc)
+               return rc;
 
-       if (*poffset > file->f_path.dentry->d_inode->i_size)
-               long_op = 2; /* writes past end of file can take a long time */
-       else
-               long_op = 1;
+       xid = GetXid();
 
+       long_op = cifs_write_timeout(cifsi, *poffset);
        for (total_written = 0; write_size > total_written;
             total_written += bytes_written) {
                rc = -EAGAIN;
@@ -844,7 +1051,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
                                   filemap_fdatawait from here so tell
                                   reopen_file not to flush data to server
                                   now */
-                               rc = cifs_reopen_file(file, FALSE);
+                               rc = cifs_reopen_file(file, false);
                                if (rc != 0)
                                        break;
                        }
@@ -863,9 +1070,11 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
                                FreeXid(xid);
                                return rc;
                        }
-               } else
+               } else {
+                       cifs_update_eof(cifsi, *poffset, bytes_written);
                        *poffset += bytes_written;
-               long_op = FALSE; /* subsequent writes fast -
+               }
+               long_op = CIFS_STD_OP; /* subsequent writes fast -
                                    15 seconds is plenty */
        }
 
@@ -891,7 +1100,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 }
 
 static ssize_t cifs_write(struct file *file, const char *write_data,
-       size_t write_size, loff_t *poffset)
+                         size_t write_size, loff_t *poffset)
 {
        int rc = 0;
        unsigned int bytes_written = 0;
@@ -900,6 +1109,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
        struct cifsTconInfo *pTcon;
        int xid, long_op;
        struct cifsFileInfo *open_file;
+       struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
 
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 
@@ -914,11 +1124,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 
        xid = GetXid();
 
-       if (*poffset > file->f_path.dentry->d_inode->i_size)
-               long_op = 2; /* writes past end of file can take a long time */
-       else
-               long_op = 1;
-
+       long_op = cifs_write_timeout(cifsi, *poffset);
        for (total_written = 0; write_size > total_written;
             total_written += bytes_written) {
                rc = -EAGAIN;
@@ -944,7 +1150,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
                                   filemap_fdatawait from here so tell
                                   reopen_file not to flush data to
                                   server now */
-                               rc = cifs_reopen_file(file, FALSE);
+                               rc = cifs_reopen_file(file, false);
                                if (rc != 0)
                                        break;
                        }
@@ -981,9 +1187,11 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
                                FreeXid(xid);
                                return rc;
                        }
-               } else
+               } else {
+                       cifs_update_eof(cifsi, *poffset, bytes_written);
                        *poffset += bytes_written;
-               long_op = FALSE; /* subsequent writes fast -
+               }
+               long_op = CIFS_STD_OP; /* subsequent writes fast -
                                    15 seconds is plenty */
        }
 
@@ -1007,9 +1215,41 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
        return total_written;
 }
 
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
+{
+       struct cifsFileInfo *open_file = NULL;
+
+       read_lock(&GlobalSMBSeslock);
+       /* we could simply get the first_list_entry since write-only entries
+          are always at the end of the list but since the first entry might
+          have a close pending, we go through the whole list */
+       list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
+               if (open_file->closePend)
+                       continue;
+               if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
+                   (open_file->pfile->f_flags & O_RDONLY))) {
+                       if (!open_file->invalidHandle) {
+                               /* found a good file */
+                               /* lock it so it will not be closed on us */
+                               atomic_inc(&open_file->wrtPending);
+                               read_unlock(&GlobalSMBSeslock);
+                               return open_file;
+                       } /* else might as well continue, and look for
+                            another, or simply have the caller reopen it
+                            again rather than trying to fix this handle */
+               } else /* write only file */
+                       break; /* write only files are last so must be done */
+       }
+       read_unlock(&GlobalSMBSeslock);
+       return NULL;
+}
+#endif
+
 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
 {
        struct cifsFileInfo *open_file;
+       bool any_available = false;
        int rc;
 
        /* Having a null inode here (because mapping->host was set to zero by
@@ -1023,33 +1263,66 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
        }
 
        read_lock(&GlobalSMBSeslock);
+refind_writable:
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
-               if (open_file->closePend)
+               if (open_file->closePend ||
+                   (!any_available && open_file->pid != current->tgid))
                        continue;
+
                if (open_file->pfile &&
                    ((open_file->pfile->f_flags & O_RDWR) ||
                     (open_file->pfile->f_flags & O_WRONLY))) {
                        atomic_inc(&open_file->wrtPending);
+
+                       if (!open_file->invalidHandle) {
+                               /* found a good writable file */
+                               read_unlock(&GlobalSMBSeslock);
+                               return open_file;
+                       }
+
                        read_unlock(&GlobalSMBSeslock);
-                       if ((open_file->invalidHandle) &&
-                          (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) {
-                               rc = cifs_reopen_file(open_file->pfile, FALSE);
-                               /* if it fails, try another handle - might be */
-                               /* dangerous to hold up writepages with retry */
-                               if (rc) {
-                                       cFYI(1,
-                                             ("failed on reopen file in wp"));
+                       /* Had to unlock since following call can block */
+                       rc = cifs_reopen_file(open_file->pfile, false);
+                       if (!rc) {
+                               if (!open_file->closePend)
+                                       return open_file;
+                               else { /* start over in case this was deleted */
+                                      /* since the list could be modified */
                                        read_lock(&GlobalSMBSeslock);
-                                       /* can not use this handle, no write
-                                       pending on this one after all */
-                                       atomic_dec
-                                            (&open_file->wrtPending);
-                                       continue;
+                                       atomic_dec(&open_file->wrtPending);
+                                       goto refind_writable;
                                }
                        }
-                       return open_file;
+
+                       /* if it fails, try another handle if possible -
+                       (we can not do this if closePending since
+                       loop could be modified - in which case we
+                       have to start at the beginning of the list
+                       again. Note that it would be bad
+                       to hold up writepages here (rather than
+                       in caller) with continuous retries */
+                       cFYI(1, ("wp failed on reopen file"));
+                       read_lock(&GlobalSMBSeslock);
+                       /* can not use this handle, no write
+                          pending on this one after all */
+                       atomic_dec(&open_file->wrtPending);
+
+                       if (open_file->closePend) /* list could have changed */
+                               goto refind_writable;
+                       /* else we simply continue to the next entry. Thus
+                          we do not loop on reopen errors.  If we
+                          can not reopen the file, for example if we
+                          reconnected to a server with another client
+                          racing to delete or lock the file we would not
+                          make progress if we restarted before the beginning
+                          of the loop here. */
                }
        }
+       /* couldn't find useable FH with same pid, try any available */
+       if (!any_available) {
+               any_available = true;
+               goto refind_writable;
+       }
        read_unlock(&GlobalSMBSeslock);
        return NULL;
 }
@@ -1099,12 +1372,10 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
                atomic_dec(&open_file->wrtPending);
                /* Does mm or vfs already set times? */
                inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
-               if ((bytes_written > 0) && (offset)) {
+               if ((bytes_written > 0) && (offset))
                        rc = 0;
-               } else if (bytes_written < 0) {
-                       if (rc != -EBADF)
-                               rc = bytes_written;
-               }
+               else if (bytes_written < 0)
+                       rc = bytes_written;
        } else {
                cFYI(1, ("No writeable filehandles for inode"));
                rc = -EIO;
@@ -1132,11 +1403,12 @@ static int cifs_writepages(struct address_space *mapping,
        int nr_pages;
        __u64 offset = 0;
        struct cifsFileInfo *open_file;
+       struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
        struct page *page;
        struct pagevec pvec;
        int rc = 0;
        int scanned = 0;
-       int xid;
+       int xid, long_op;
 
        cifs_sb = CIFS_SB(mapping->host->i_sb);
 
@@ -1206,7 +1478,7 @@ retry:
 
                        if (first < 0)
                                lock_page(page);
-                       else if (TestSetPageLocked(page))
+                       else if (!trylock_page(page))
                                break;
 
                        if (unlikely(page->mapping != mapping)) {
@@ -1280,18 +1552,24 @@ retry:
                                cERROR(1, ("No writable handles for inode"));
                                rc = -EBADF;
                        } else {
+                               long_op = cifs_write_timeout(cifsi, offset);
                                rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
                                                   open_file->netfid,
                                                   bytes_to_write, offset,
                                                   &bytes_written, iov, n_iov,
-                                                  1);
+                                                  long_op);
                                atomic_dec(&open_file->wrtPending);
+                               cifs_update_eof(cifsi, offset, bytes_written);
+
                                if (rc || bytes_written < bytes_to_write) {
                                        cERROR(1, ("Write2 ret %d, wrote %d",
                                                  rc, bytes_written));
                                        /* BB what if continued retry is
                                           requested via mount flags? */
-                                       set_bit(AS_EIO, &mapping->flags);
+                                       if (rc == -ENOSPC)
+                                               set_bit(AS_ENOSPC, &mapping->flags);
+                                       else
+                                               set_bit(AS_EIO, &mapping->flags);
                                } else {
                                        cifs_stats_bytes_written(cifs_sb->tcon,
                                                                 bytes_written);
@@ -1314,7 +1592,10 @@ retry:
                        if ((wbc->nr_to_write -= n_iov) <= 0)
                                done = 1;
                        index = next;
-               }
+               } else
+                       /* Need to re-find the pages we skipped */
+                       index = pvec.pages[0]->index + 1;
+
                pagevec_release(&pvec);
        }
        if (!scanned && !done) {
@@ -1342,9 +1623,8 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
        xid = GetXid();
 /* BB add check for wbc flags */
        page_cache_get(page);
-       if (!PageUptodate(page)) {
+       if (!PageUptodate(page))
                cFYI(1, ("ppw - page not up to date"));
-       }
 
        /*
         * Set the "writeback" flag, and clear "dirty" in the radix tree.
@@ -1366,49 +1646,56 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
        return rc;
 }
 
-static int cifs_commit_write(struct file *file, struct page *page,
-       unsigned offset, unsigned to)
+static int cifs_write_end(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned copied,
+                       struct page *page, void *fsdata)
 {
-       int xid;
-       int rc = 0;
-       struct inode *inode = page->mapping->host;
-       loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-       char *page_data;
+       int rc;
+       struct inode *inode = mapping->host;
+
+       cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
+                page, pos, copied));
+
+       if (PageChecked(page)) {
+               if (copied == len)
+                       SetPageUptodate(page);
+               ClearPageChecked(page);
+       } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+               SetPageUptodate(page);
 
-       xid = GetXid();
-       cFYI(1, ("commit write for page %p up to position %lld for %d",
-                page, position, to));
-       spin_lock(&inode->i_lock);
-       if (position > inode->i_size) {
-               i_size_write(inode, position);
-       }
-       spin_unlock(&inode->i_lock);
        if (!PageUptodate(page)) {
-               position =  ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset;
-               /* can not rely on (or let) writepage write this data */
-               if (to < offset) {
-                       cFYI(1, ("Illegal offsets, can not copy from %d to %d",
-                               offset, to));
-                       FreeXid(xid);
-                       return rc;
-               }
+               char *page_data;
+               unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+               int xid;
+
+               xid = GetXid();
                /* this is probably better than directly calling
                   partialpage_write since in this function the file handle is
                   known which we might as well leverage */
                /* BB check if anything else missing out of ppw
                   such as updating last write time */
                page_data = kmap(page);
-               rc = cifs_write(file, page_data + offset, to-offset,
-                               &position);
-               if (rc > 0)
-                       rc = 0;
-               /* else if (rc < 0) should we set writebehind rc? */
+               rc = cifs_write(file, page_data + offset, copied, &pos);
+               /* if (rc < 0) should we set writebehind rc? */
                kunmap(page);
+
+               FreeXid(xid);
        } else {
+               rc = copied;
+               pos += copied;
                set_page_dirty(page);
        }
 
-       FreeXid(xid);
+       if (rc > 0) {
+               spin_lock(&inode->i_lock);
+               if (pos > inode->i_size)
+                       i_size_write(inode, pos);
+               spin_unlock(&inode->i_lock);
+       }
+
+       unlock_page(page);
+       page_cache_release(page);
+
        return rc;
 }
 
@@ -1416,6 +1703,9 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
        int xid;
        int rc = 0;
+       struct cifsTconInfo *tcon;
+       struct cifsFileInfo *smbfile =
+               (struct cifsFileInfo *)file->private_data;
        struct inode *inode = file->f_path.dentry->d_inode;
 
        xid = GetXid();
@@ -1423,9 +1713,16 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
        cFYI(1, ("Sync file - name: %s datasync: 0x%x",
                dentry->d_name.name, datasync));
 
-       rc = filemap_fdatawrite(inode->i_mapping);
-       if (rc == 0)
+       rc = filemap_write_and_wait(inode->i_mapping);
+       if (rc == 0) {
+               rc = CIFS_I(inode)->write_behind_rc;
                CIFS_I(inode)->write_behind_rc = 0;
+               tcon = CIFS_SB(inode->i_sb)->tcon;
+               if (!rc && tcon && smbfile &&
+                  !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
+                       rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
+       }
+
        FreeXid(xid);
        return rc;
 }
@@ -1477,8 +1774,11 @@ int cifs_flush(struct file *file, fl_owner_t id)
           filemapfdatawrite appears easier for the time being */
 
        rc = filemap_fdatawrite(inode->i_mapping);
-       if (!rc) /* reset wb rc if we were able to write out dirty pages */
+       /* reset wb rc if we were able to write out dirty pages */
+       if (!rc) {
+               rc = CIFS_I(inode)->write_behind_rc;
                CIFS_I(inode)->write_behind_rc = 0;
+       }
 
        cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc));
 
@@ -1510,9 +1810,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
        }
        open_file = (struct cifsFileInfo *)file->private_data;
 
-       if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
+       if ((file->f_flags & O_ACCMODE) == O_WRONLY)
                cFYI(1, ("attempting read on write only file instance"));
-       }
+
        for (total_read = 0, current_offset = read_data;
             read_size > total_read;
             total_read += bytes_read, current_offset += bytes_read) {
@@ -1524,7 +1824,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
                        int buf_type = CIFS_NO_BUFFER;
                        if ((open_file->invalidHandle) &&
                            (!open_file->closePend)) {
-                               rc = cifs_reopen_file(file, TRUE);
+                               rc = cifs_reopen_file(file, true);
                                if (rc != 0)
                                        break;
                        }
@@ -1539,9 +1839,8 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
                                                smb_read_data +
                                                4 /* RFC1001 length field */ +
                                                le16_to_cpu(pSMBr->DataOffset),
-                                               bytes_read)) {
+                                               bytes_read))
                                        rc = -EFAULT;
-                               }
 
                                if (buf_type == CIFS_SMALL_BUFFER)
                                        cifs_small_buf_release(smb_read_data);
@@ -1610,7 +1909,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
                while (rc == -EAGAIN) {
                        if ((open_file->invalidHandle) &&
                            (!open_file->closePend)) {
-                               rc = cifs_reopen_file(file, TRUE);
+                               rc = cifs_reopen_file(file, true);
                                if (rc != 0)
                                        break;
                        }
@@ -1695,7 +1994,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
                SetPageUptodate(page);
                unlock_page(page);
                if (!pagevec_add(plru_pvec, page))
-                       __pagevec_lru_add(plru_pvec);
+                       __pagevec_lru_add_file(plru_pvec);
                data += PAGE_CACHE_SIZE;
        }
        return;
@@ -1710,7 +2009,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
        struct page *page;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
-       int bytes_read = 0;
+       unsigned int bytes_read = 0;
        unsigned int read_size, i;
        char *smb_read_data = NULL;
        struct smb_com_read_rsp *pSMBr;
@@ -1728,9 +2027,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
        pTcon = cifs_sb->tcon;
 
        pagevec_init(&lru_pvec, 0);
-#ifdef CONFIG_CIFS_DEBUG2
-               cFYI(1, ("rpages: num pages %d", num_pages));
-#endif
+       cFYI(DBG2, ("rpages: num pages %d", num_pages));
        for (i = 0; i < num_pages; ) {
                unsigned contig_pages;
                struct page *tmp_page;
@@ -1763,15 +2060,13 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                /* Read size needs to be in multiples of one page */
                read_size = min_t(const unsigned int, read_size,
                                  cifs_sb->rsize & PAGE_CACHE_MASK);
-#ifdef CONFIG_CIFS_DEBUG2
-               cFYI(1, ("rpages: read size 0x%x  contiguous pages %d",
+               cFYI(DBG2, ("rpages: read size 0x%x  contiguous pages %d",
                                read_size, contig_pages));
-#endif
                rc = -EAGAIN;
                while (rc == -EAGAIN) {
                        if ((open_file->invalidHandle) &&
                            (!open_file->closePend)) {
-                               rc = cifs_reopen_file(file, TRUE);
+                               rc = cifs_reopen_file(file, true);
                                if (rc != 0)
                                        break;
                        }
@@ -1804,7 +2099,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 
                        i +=  bytes_read >> PAGE_CACHE_SHIFT;
                        cifs_stats_bytes_read(pTcon, bytes_read);
-                       if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
+                       if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
                                i++; /* account for partial page */
 
                                /* server copy of file can have smaller size
@@ -1833,7 +2128,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                bytes_read = 0;
        }
 
-       pagevec_lru_add(&lru_pvec);
+       pagevec_lru_add_file(&lru_pvec);
 
 /* need to free smb_read_data buf before exit */
        if (smb_read_data) {
@@ -1930,74 +2225,100 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
    refreshing the inode only on increases in the file size
    but this is tricky to do without racing with writebehind
    page caching in the current Linux kernel design */
-int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
+bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
 {
        if (!cifsInode)
-               return 1;
+               return true;
 
        if (is_inode_writable(cifsInode)) {
                /* This inode is open for write at least once */
                struct cifs_sb_info *cifs_sb;
 
                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
-               if ( cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO ) {
+               if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
                        /* since no page cache to corrupt on directio
                        we can change size safely */
-                       return 1;
+                       return true;
                }
 
                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
-                       return 1;
+                       return true;
 
-               return 0;
+               return false;
        } else
-               return 1;
+               return true;
 }
 
-static int cifs_prepare_write(struct file *file, struct page *page,
-       unsigned from, unsigned to)
+static int cifs_write_begin(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata)
 {
-       int rc = 0;
+       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+       loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
+       loff_t page_start = pos & PAGE_MASK;
        loff_t i_size;
-       loff_t offset;
+       struct page *page;
+       int rc = 0;
 
-       cFYI(1, ("prepare write for page %p from %d to %d", page, from, to));
-       if (PageUptodate(page))
-               return 0;
+       cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
 
-       /* If we are writing a full page it will be up to date,
-          no need to read from the server */
-       if ((to == PAGE_CACHE_SIZE) && (from == 0)) {
-               SetPageUptodate(page);
-               return 0;
+       page = grab_cache_page_write_begin(mapping, index, flags);
+       if (!page) {
+               rc = -ENOMEM;
+               goto out;
        }
 
-       offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-       i_size = i_size_read(page->mapping->host);
+       if (PageUptodate(page))
+               goto out;
 
-       if ((offset >= i_size) ||
-           ((from == 0) && (offset + to) >= i_size)) {
+       /*
+        * If we write a full page it will be up to date, no need to read from
+        * the server. If the write is short, we'll end up doing a sync write
+        * instead.
+        */
+       if (len == PAGE_CACHE_SIZE)
+               goto out;
+
+       /*
+        * optimize away the read when we have an oplock, and we're not
+        * expecting to use any of the data we'd be reading in. That
+        * is, when the page lies beyond the EOF, or straddles the EOF
+        * and the write will cover all of the existing data.
+        */
+       if (CIFS_I(mapping->host)->clientCanCacheRead) {
+               i_size = i_size_read(mapping->host);
+               if (page_start >= i_size ||
+                   (offset == 0 && (pos + len) >= i_size)) {
+                       zero_user_segments(page, 0, offset,
+                                          offset + len,
+                                          PAGE_CACHE_SIZE);
+                       /*
+                        * PageChecked means that the parts of the page
+                        * to which we're not writing are considered up
+                        * to date. Once the data is copied to the
+                        * page, it can be set uptodate.
+                        */
+                       SetPageChecked(page);
+                       goto out;
+               }
+       }
+
+       if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
                /*
-                * We don't need to read data beyond the end of the file.
-                * zero it, and set the page uptodate
+                * might as well read a page, it is fast enough. If we get
+                * an error, we don't need to return it. cifs_write_end will
+                * do a sync write instead since PG_uptodate isn't set.
                 */
-               simple_prepare_write(file, page, from, to);
-               SetPageUptodate(page);
-       } else if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
-               /* might as well read a page, it is fast enough */
-               rc = cifs_readpage_worker(file, page, &offset);
+               cifs_readpage_worker(file, page, &page_start);
        } else {
                /* we could try using another file handle if there is one -
                   but how would we lock it to prevent close of that handle
                   racing with this read? In any case
-                  this will be written out by commit_write so is fine */
+                  this will be written out by write_end so is fine */
        }
-
-       /* we do not need to pass errors back
-          e.g. if we do not have read access to the file
-          because cifs_commit_write will do the right thing.  -- shaggy */
-
-       return 0;
+out:
+       *pagep = page;
+       return rc;
 }
 
 const struct address_space_operations cifs_addr_ops = {
@@ -2005,8 +2326,8 @@ const struct address_space_operations cifs_addr_ops = {
        .readpages = cifs_readpages,
        .writepage = cifs_writepage,
        .writepages = cifs_writepages,
-       .prepare_write = cifs_prepare_write,
-       .commit_write = cifs_commit_write,
+       .write_begin = cifs_write_begin,
+       .write_end = cifs_write_end,
        .set_page_dirty = __set_page_dirty_nobuffers,
        /* .sync_page = cifs_sync_page, */
        /* .direct_IO = */
@@ -2021,8 +2342,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
        .readpage = cifs_readpage,
        .writepage = cifs_writepage,
        .writepages = cifs_writepages,
-       .prepare_write = cifs_prepare_write,
-       .commit_write = cifs_commit_write,
+       .write_begin = cifs_write_begin,
+       .write_end = cifs_write_end,
        .set_page_dirty = __set_page_dirty_nobuffers,
        /* .sync_page = cifs_sync_page, */
        /* .direct_IO = */