Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux...
[safe/jmp/linux-2.6] / fs / xfs / linux-2.6 / xfs_lrw.c
index 110c038..1bf47f2 100644 (file)
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
 #include "xfs_attr.h"
 #include "xfs_inode_item.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_iomap.h"
+#include "xfs_vnodeops.h"
 
 #include <linux/capability.h>
 #include <linux/writeback.h>
 void
 xfs_rw_enter_trace(
        int                     tag,
-       xfs_iocore_t            *io,
+       xfs_inode_t             *ip,
        void                    *data,
        size_t                  segs,
        loff_t                  offset,
        int                     ioflags)
 {
-       xfs_inode_t     *ip = XFS_IO_INODE(io);
-
        if (ip->i_rwtrace == NULL)
                return;
        ktrace_enter(ip->i_rwtrace,
@@ -79,8 +75,8 @@ xfs_rw_enter_trace(
                (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
                (void *)((unsigned long)(offset & 0xffffffff)),
                (void *)((unsigned long)ioflags),
-               (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+               (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
                (void *)((unsigned long)current_pid()),
                (void *)NULL,
                (void *)NULL,
@@ -90,13 +86,12 @@ xfs_rw_enter_trace(
 
 void
 xfs_inval_cached_trace(
-       xfs_iocore_t    *io,
+       xfs_inode_t     *ip,
        xfs_off_t       offset,
        xfs_off_t       len,
        xfs_off_t       first,
        xfs_off_t       last)
 {
-       xfs_inode_t     *ip = XFS_IO_INODE(io);
 
        if (ip->i_rwtrace == NULL)
                return;
@@ -132,56 +127,38 @@ xfs_inval_cached_trace(
  */
 STATIC int
 xfs_iozero(
-       struct inode            *ip,    /* inode                        */
+       struct xfs_inode        *ip,    /* inode                        */
        loff_t                  pos,    /* offset in file               */
-       size_t                  count,  /* size of data to zero         */
-       loff_t                  end_size)       /* max file size to set */
+       size_t                  count)  /* size of data to zero         */
 {
-       unsigned                bytes;
        struct page             *page;
        struct address_space    *mapping;
-       char                    *kaddr;
        int                     status;
 
-       mapping = ip->i_mapping;
+       mapping = VFS_I(ip)->i_mapping;
        do {
-               unsigned long index, offset;
+               unsigned offset, bytes;
+               void *fsdata;
 
                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-               index = pos >> PAGE_CACHE_SHIFT;
                bytes = PAGE_CACHE_SIZE - offset;
                if (bytes > count)
                        bytes = count;
 
-               status = -ENOMEM;
-               page = grab_cache_page(mapping, index);
-               if (!page)
+               status = pagecache_write_begin(NULL, mapping, pos, bytes,
+                                       AOP_FLAG_UNINTERRUPTIBLE,
+                                       &page, &fsdata);
+               if (status)
                        break;
 
-               kaddr = kmap(page);
-               status = mapping->a_ops->prepare_write(NULL, page, offset,
-                                                       offset + bytes);
-               if (status) {
-                       goto unlock;
-               }
-
-               memset((void *) (kaddr + offset), 0, bytes);
-               flush_dcache_page(page);
-               status = mapping->a_ops->commit_write(NULL, page, offset,
-                                                       offset + bytes);
-               if (!status) {
-                       pos += bytes;
-                       count -= bytes;
-                       if (pos > i_size_read(ip))
-                               i_size_write(ip, pos < end_size ? pos : end_size);
-               }
+               zero_user(page, offset, bytes);
 
-unlock:
-               kunmap(page);
-               unlock_page(page);
-               page_cache_release(page);
-               if (status)
-                       break;
+               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+                                       page, fsdata);
+               WARN_ON(status <= 0); /* can't return less than zero! */
+               pos += bytes;
+               count -= bytes;
+               status = 0;
        } while (count);
 
        return (-status);
@@ -189,27 +166,21 @@ unlock:
 
 ssize_t                        /* bytes read, or (-)  error */
 xfs_read(
-       bhv_desc_t              *bdp,
+       xfs_inode_t             *ip,
        struct kiocb            *iocb,
        const struct iovec      *iovp,
        unsigned int            segs,
        loff_t                  *offset,
-       int                     ioflags,
-       cred_t                  *credp)
+       int                     ioflags)
 {
        struct file             *file = iocb->ki_filp;
        struct inode            *inode = file->f_mapping->host;
+       xfs_mount_t             *mp = ip->i_mount;
        size_t                  size = 0;
-       ssize_t                 ret;
+       ssize_t                 ret = 0;
        xfs_fsize_t             n;
-       xfs_inode_t             *ip;
-       xfs_mount_t             *mp;
-       bhv_vnode_t             *vp;
        unsigned long           seg;
 
-       ip = XFS_BHVTOI(bdp);
-       vp = BHV_TO_VNODE(bdp);
-       mp = ip->i_mount;
 
        XFS_STATS_INC(xs_read_calls);
 
@@ -229,11 +200,11 @@ xfs_read(
 
        if (unlikely(ioflags & IO_ISDIRECT)) {
                xfs_buftarg_t   *target =
-                       (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                       XFS_IS_REALTIME_INODE(ip) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp;
                if ((*offset & target->bt_smask) ||
                    (size & target->bt_smask)) {
-                       if (*offset == ip->i_d.di_size) {
+                       if (*offset == ip->i_size) {
                                return (0);
                        }
                        return -XFS_ERROR(EINVAL);
@@ -254,14 +225,12 @@ xfs_read(
                mutex_lock(&inode->i_mutex);
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
-       if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
-           !(ioflags & IO_INVIS)) {
-               bhv_vrwlock_t locktype = VRWLOCK_READ;
+       if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
                int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+               int iolock = XFS_IOLOCK_SHARED;
 
-               ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
-                                       BHV_TO_VNODE(bdp), *offset, size,
-                                       dmflags, &locktype);
+               ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
+                                       dmflags, &iolock);
                if (ret) {
                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
                        if (unlikely(ioflags & IO_ISDIRECT))
@@ -270,62 +239,22 @@ xfs_read(
                }
        }
 
-       if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp)))
-               bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
-                                               -1, FI_REMAPF_LOCKED);
-
-       if (unlikely(ioflags & IO_ISDIRECT))
+       if (unlikely(ioflags & IO_ISDIRECT)) {
+               if (inode->i_mapping->nrpages)
+                       ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
+                                                   -1, FI_REMAPF_LOCKED);
                mutex_unlock(&inode->i_mutex);
-
-       xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
-                               (void *)iovp, segs, *offset, ioflags);
-       ret = __generic_file_aio_read(iocb, iovp, segs, offset);
-       if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
-               ret = wait_on_sync_kiocb(iocb);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
-ssize_t
-xfs_sendfile(
-       bhv_desc_t              *bdp,
-       struct file             *filp,
-       loff_t                  *offset,
-       int                     ioflags,
-       size_t                  count,
-       read_actor_t            actor,
-       void                    *target,
-       cred_t                  *credp)
-{
-       xfs_inode_t             *ip = XFS_BHVTOI(bdp);
-       xfs_mount_t             *mp = ip->i_mount;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_read_calls);
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
-       if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-           (!(ioflags & IO_INVIS))) {
-               bhv_vrwlock_t locktype = VRWLOCK_READ;
-               int error;
-
-               error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
-                                     *offset, count,
-                                     FILP_DELAY_FLAG(filp), &locktype);
-               if (error) {
+               if (ret) {
                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-                       return -error;
+                       return ret;
                }
        }
-       xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
-                  (void *)(unsigned long)target, count, *offset, ioflags);
-       ret = generic_file_sendfile(filp, offset, count, actor, target);
+
+       xfs_rw_enter_trace(XFS_READ_ENTER, ip,
+                               (void *)iovp, segs, *offset, ioflags);
+
+       iocb->ki_pos = *offset;
+       ret = generic_file_aio_read(iocb, iovp, segs, *offset);
        if (ret > 0)
                XFS_STATS_ADD(xs_read_bytes, ret);
 
@@ -335,16 +264,14 @@ xfs_sendfile(
 
 ssize_t
 xfs_splice_read(
-       bhv_desc_t              *bdp,
+       xfs_inode_t             *ip,
        struct file             *infilp,
        loff_t                  *ppos,
        struct pipe_inode_info  *pipe,
        size_t                  count,
        int                     flags,
-       int                     ioflags,
-       cred_t                  *credp)
+       int                     ioflags)
 {
-       xfs_inode_t             *ip = XFS_BHVTOI(bdp);
        xfs_mount_t             *mp = ip->i_mount;
        ssize_t                 ret;
 
@@ -354,20 +281,18 @@ xfs_splice_read(
 
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
-       if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-           (!(ioflags & IO_INVIS))) {
-               bhv_vrwlock_t locktype = VRWLOCK_READ;
+       if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
+               int iolock = XFS_IOLOCK_SHARED;
                int error;
 
-               error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
-                                       *ppos, count,
-                                       FILP_DELAY_FLAG(infilp), &locktype);
+               error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
+                                       FILP_DELAY_FLAG(infilp), &iolock);
                if (error) {
                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
                        return -error;
                }
        }
-       xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore,
+       xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip,
                           pipe, count, *ppos, ioflags);
        ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
        if (ret > 0)
@@ -379,18 +304,18 @@ xfs_splice_read(
 
 ssize_t
 xfs_splice_write(
-       bhv_desc_t              *bdp,
+       xfs_inode_t             *ip,
        struct pipe_inode_info  *pipe,
        struct file             *outfilp,
        loff_t                  *ppos,
        size_t                  count,
        int                     flags,
-       int                     ioflags,
-       cred_t                  *credp)
+       int                     ioflags)
 {
-       xfs_inode_t             *ip = XFS_BHVTOI(bdp);
        xfs_mount_t             *mp = ip->i_mount;
        ssize_t                 ret;
+       struct inode            *inode = outfilp->f_mapping->host;
+       xfs_fsize_t             isize, new_size;
 
        XFS_STATS_INC(xs_write_calls);
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -398,25 +323,49 @@ xfs_splice_write(
 
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
 
-       if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) &&
-           (!(ioflags & IO_INVIS))) {
-               bhv_vrwlock_t locktype = VRWLOCK_WRITE;
+       if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
+               int iolock = XFS_IOLOCK_EXCL;
                int error;
 
-               error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp),
-                                       *ppos, count,
-                                       FILP_DELAY_FLAG(outfilp), &locktype);
+               error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
+                                       FILP_DELAY_FLAG(outfilp), &iolock);
                if (error) {
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return -error;
                }
        }
-       xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
+
+       new_size = *ppos + count;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip,
                           pipe, count, *ppos, ioflags);
        ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
        if (ret > 0)
                XFS_STATS_ADD(xs_write_bytes, ret);
 
+       isize = i_size_read(inode);
+       if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
+               *ppos = isize;
+
+       if (*ppos > ip->i_size) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               if (*ppos > ip->i_size)
+                       ip->i_size = *ppos;
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+
+       if (ip->i_new_size) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               ip->i_new_size = 0;
+               if (ip->i_d.di_size > ip->i_size)
+                       ip->i_d.di_size = ip->i_size;
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
        return ret;
 }
@@ -429,21 +378,19 @@ xfs_splice_write(
  */
 STATIC int                             /* error (positive) */
 xfs_zero_last_block(
-       struct inode    *ip,
-       xfs_iocore_t    *io,
-       xfs_fsize_t     isize,
-       xfs_fsize_t     end_size)
+       xfs_inode_t     *ip,
+       xfs_fsize_t     offset,
+       xfs_fsize_t     isize)
 {
        xfs_fileoff_t   last_fsb;
-       xfs_mount_t     *mp = io->io_mount;
+       xfs_mount_t     *mp = ip->i_mount;
        int             nimaps;
        int             zero_offset;
        int             zero_len;
        int             error = 0;
        xfs_bmbt_irec_t imap;
-       loff_t          loff;
 
-       ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
        zero_offset = XFS_B_FSB_OFFSET(mp, isize);
        if (zero_offset == 0) {
@@ -456,7 +403,7 @@ xfs_zero_last_block(
 
        last_fsb = XFS_B_TO_FSBT(mp, isize);
        nimaps = 1;
-       error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
+       error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
                          &nimaps, NULL, NULL);
        if (error) {
                return error;
@@ -474,13 +421,14 @@ xfs_zero_last_block(
         * out sync.  We need to drop the ilock while we do this so we
         * don't deadlock when the buffer cache calls back to us.
         */
-       XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
-       loff = XFS_FSB_TO_B(mp, last_fsb);
        zero_len = mp->m_sb.sb_blocksize - zero_offset;
-       error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
+       if (isize + zero_len > offset)
+               zero_len = offset - isize;
+       error = xfs_iozero(ip, isize, zero_len);
 
-       XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
        ASSERT(error >= 0);
        return error;
 }
@@ -498,34 +446,31 @@ xfs_zero_last_block(
 
 int                                    /* error (positive) */
 xfs_zero_eof(
-       bhv_vnode_t     *vp,
-       xfs_iocore_t    *io,
+       xfs_inode_t     *ip,
        xfs_off_t       offset,         /* starting I/O offset */
-       xfs_fsize_t     isize,          /* current inode size */
-       xfs_fsize_t     end_size)       /* terminal inode size */
+       xfs_fsize_t     isize)          /* current inode size */
 {
-       struct inode    *ip = vn_to_inode(vp);
+       xfs_mount_t     *mp = ip->i_mount;
        xfs_fileoff_t   start_zero_fsb;
        xfs_fileoff_t   end_zero_fsb;
        xfs_fileoff_t   zero_count_fsb;
        xfs_fileoff_t   last_fsb;
-       xfs_mount_t     *mp = io->io_mount;
+       xfs_fileoff_t   zero_off;
+       xfs_fsize_t     zero_len;
        int             nimaps;
        int             error = 0;
        xfs_bmbt_irec_t imap;
 
-       ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
-       ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
        ASSERT(offset > isize);
 
        /*
         * First handle zeroing the block on which isize resides.
         * We only zero a part of that block so it is handled specially.
         */
-       error = xfs_zero_last_block(ip, io, isize, end_size);
+       error = xfs_zero_last_block(ip, offset, isize);
        if (error) {
-               ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
-               ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
                return error;
        }
 
@@ -553,11 +498,10 @@ xfs_zero_eof(
        while (start_zero_fsb <= end_zero_fsb) {
                nimaps = 1;
                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-               error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
+               error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
                                  0, NULL, 0, &imap, &nimaps, NULL, NULL);
                if (error) {
-                       ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
-                       ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+                       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
                        return error;
                }
                ASSERT(nimaps > 0);
@@ -581,12 +525,15 @@ xfs_zero_eof(
                 * Drop the inode lock while we're doing the I/O.
                 * We'll still have the iolock to protect us.
                 */
-               XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+               if ((zero_off + zero_len) > offset)
+                       zero_len = offset - zero_off;
 
-               error = xfs_iozero(ip,
-                                  XFS_FSB_TO_B(mp, start_zero_fsb),
-                                  XFS_FSB_TO_B(mp, imap.br_blockcount),
-                                  end_size);
+               error = xfs_iozero(ip, zero_off, zero_len);
                if (error) {
                        goto out_lock;
                }
@@ -594,69 +541,44 @@ xfs_zero_eof(
                start_zero_fsb = imap.br_startoff + imap.br_blockcount;
                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
 
-               XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
        }
 
        return 0;
 
 out_lock:
-
-       XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
        ASSERT(error >= 0);
        return error;
 }
 
 ssize_t                                /* bytes written, or (-) error */
 xfs_write(
-       bhv_desc_t              *bdp,
+       struct xfs_inode        *xip,
        struct kiocb            *iocb,
        const struct iovec      *iovp,
        unsigned int            nsegs,
        loff_t                  *offset,
-       int                     ioflags,
-       cred_t                  *credp)
+       int                     ioflags)
 {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
        struct inode            *inode = mapping->host;
        unsigned long           segs = nsegs;
-       xfs_inode_t             *xip;
        xfs_mount_t             *mp;
        ssize_t                 ret = 0, error = 0;
        xfs_fsize_t             isize, new_size;
-       xfs_iocore_t            *io;
-       bhv_vnode_t             *vp;
-       unsigned long           seg;
        int                     iolock;
        int                     eventsent = 0;
-       bhv_vrwlock_t           locktype;
        size_t                  ocount = 0, count;
        loff_t                  pos;
-       int                     need_i_mutex = 1, need_flush = 0;
+       int                     need_i_mutex;
 
        XFS_STATS_INC(xs_write_calls);
 
-       vp = BHV_TO_VNODE(bdp);
-       xip = XFS_BHVTOI(bdp);
-
-       for (seg = 0; seg < segs; seg++) {
-               const struct iovec *iv = &iovp[seg];
-
-               /*
-                * If any segment has a negative length, or the cumulative
-                * length ever wraps negative then return -EINVAL.
-                */
-               ocount += iv->iov_len;
-               if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
-                       return -EINVAL;
-               if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
-                       continue;
-               if (seg == 0)
-                       return -EFAULT;
-               segs = seg;
-               ocount -= iv->iov_len;  /* This segment is no good */
-               break;
-       }
+       error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
+       if (error)
+               return error;
 
        count = ocount;
        pos = *offset;
@@ -664,47 +586,25 @@ xfs_write(
        if (count == 0)
                return 0;
 
-       io = &xip->i_iocore;
-       mp = io->io_mount;
+       mp = xip->i_mount;
 
-       vfs_wait_for_freeze(vp->v_vfsp, SB_FREEZE_WRITE);
+       xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
 
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       if (ioflags & IO_ISDIRECT) {
-               xfs_buftarg_t   *target =
-                       (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
-                               mp->m_rtdev_targp : mp->m_ddev_targp;
-
-               if ((pos & target->bt_smask) || (count & target->bt_smask))
-                       return XFS_ERROR(-EINVAL);
-
-               if (!VN_CACHED(vp) && pos < i_size_read(inode))
-                       need_i_mutex = 0;
-
-               if (VN_CACHED(vp))
-                       need_flush = 1;
-       }
-
 relock:
-       if (need_i_mutex) {
+       if (ioflags & IO_ISDIRECT) {
+               iolock = XFS_IOLOCK_SHARED;
+               need_i_mutex = 0;
+       } else {
                iolock = XFS_IOLOCK_EXCL;
-               locktype = VRWLOCK_WRITE;
-
+               need_i_mutex = 1;
                mutex_lock(&inode->i_mutex);
-       } else {
-               iolock = XFS_IOLOCK_SHARED;
-               locktype = VRWLOCK_WRITE_DIRECT;
        }
 
        xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
 
-       isize = i_size_read(inode);
-
-       if (file->f_flags & O_APPEND)
-               *offset = isize;
-
 start:
        error = -generic_write_checks(file, &pos, &count,
                                        S_ISBLK(inode->i_mode));
@@ -713,25 +613,18 @@ start:
                goto out_unlock_mutex;
        }
 
-       new_size = pos + count;
-       if (new_size > isize)
-               io->io_new_size = new_size;
-
-       if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
+       if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
            !(ioflags & IO_INVIS) && !eventsent)) {
-               loff_t          savedsize = pos;
                int             dmflags = FILP_DELAY_FLAG(file);
 
                if (need_i_mutex)
                        dmflags |= DM_FLAGS_IMUX;
 
                xfs_iunlock(xip, XFS_ILOCK_EXCL);
-               error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
-                                     pos, count,
-                                     dmflags, &locktype);
+               error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
+                                     pos, count, dmflags, &iolock);
                if (error) {
-                       xfs_iunlock(xip, iolock);
-                       goto out_unlock_mutex;
+                       goto out_unlock_internal;
                }
                xfs_ilock(xip, XFS_ILOCK_EXCL);
                eventsent = 1;
@@ -743,17 +636,36 @@ start:
                 * event prevents another call to XFS_SEND_DATA, which is
                 * what allows the size to change in the first place.
                 */
-               if ((file->f_flags & O_APPEND) && savedsize != isize) {
-                       pos = isize = xip->i_d.di_size;
+               if ((file->f_flags & O_APPEND) && pos != xip->i_size)
+                       goto start;
+       }
+
+       if (ioflags & IO_ISDIRECT) {
+               xfs_buftarg_t   *target =
+                       XFS_IS_REALTIME_INODE(xip) ?
+                               mp->m_rtdev_targp : mp->m_ddev_targp;
+
+               if ((pos & target->bt_smask) || (count & target->bt_smask)) {
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                       return XFS_ERROR(-EINVAL);
+               }
+
+               if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                       iolock = XFS_IOLOCK_EXCL;
+                       need_i_mutex = 1;
+                       mutex_lock(&inode->i_mutex);
+                       xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
                        goto start;
                }
        }
 
-       if (likely(!(ioflags & IO_INVIS))) {
+       new_size = pos + count;
+       if (new_size > xip->i_size)
+               xip->i_new_size = new_size;
+
+       if (likely(!(ioflags & IO_INVIS)))
                file_update_time(file);
-               xfs_ichgtime_fast(xip, inode,
-                                 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       }
 
        /*
         * If the offset is beyond the size of the file, we have a couple
@@ -764,12 +676,11 @@ start:
         * to zero it out up to the new size.
         */
 
-       if (pos > isize) {
-               error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
-                                       isize, pos + count);
+       if (pos > xip->i_size) {
+               error = xfs_zero_eof(xip, pos, xip->i_size);
                if (error) {
-                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-                       goto out_unlock_mutex;
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                       goto out_unlock_internal;
                }
        }
        xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -787,36 +698,37 @@ start:
             !capable(CAP_FSETID)) {
                error = xfs_write_clear_setuid(xip);
                if (likely(!error))
-                       error = -remove_suid(file->f_dentry);
+                       error = -file_remove_suid(file);
                if (unlikely(error)) {
-                       xfs_iunlock(xip, iolock);
-                       goto out_unlock_mutex;
+                       goto out_unlock_internal;
                }
        }
 
-retry:
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
 
        if ((ioflags & IO_ISDIRECT)) {
-               if (need_flush) {
-                       xfs_inval_cached_trace(io, pos, -1,
-                                       ctooff(offtoct(pos)), -1);
-                       bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
+               if (mapping->nrpages) {
+                       WARN_ON(need_i_mutex == 0);
+                       xfs_inval_cached_trace(xip, pos, -1,
+                                       (pos & PAGE_CACHE_MASK), -1);
+                       error = xfs_flushinval_pages(xip,
+                                       (pos & PAGE_CACHE_MASK),
                                        -1, FI_REMAPF_LOCKED);
+                       if (error)
+                               goto out_unlock_internal;
                }
 
                if (need_i_mutex) {
                        /* demote the lock now the cached pages are gone */
-                       XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
+                       xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
                        mutex_unlock(&inode->i_mutex);
 
                        iolock = XFS_IOLOCK_SHARED;
-                       locktype = VRWLOCK_WRITE_DIRECT;
                        need_i_mutex = 0;
                }
 
-               xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
+               xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs,
                                *offset, ioflags);
                ret = generic_file_direct_write(iocb, iovp,
                                &segs, pos, offset, count, ocount);
@@ -831,56 +743,60 @@ retry:
                        pos += ret;
                        count -= ret;
 
-                       need_i_mutex = 1;
                        ioflags &= ~IO_ISDIRECT;
                        xfs_iunlock(xip, iolock);
                        goto relock;
                }
        } else {
-               xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
+               int enospc = 0;
+               ssize_t ret2 = 0;
+
+write_retry:
+               xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
                                *offset, ioflags);
-               ret = generic_file_buffered_write(iocb, iovp, segs,
+               ret2 = generic_file_buffered_write(iocb, iovp, segs,
                                pos, offset, count, ret);
+               /*
+                * if we just got an ENOSPC, flush the inode now we
+                * aren't holding any page locks and retry *once*
+                */
+               if (ret2 == -ENOSPC && !enospc) {
+                       error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
+                       if (error)
+                               goto out_unlock_internal;
+                       enospc = 1;
+                       goto write_retry;
+               }
+               ret = ret2;
        }
 
        current->backing_dev_info = NULL;
 
-       if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
-               ret = wait_on_sync_kiocb(iocb);
+       isize = i_size_read(inode);
+       if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+               *offset = isize;
 
-       if ((ret == -ENOSPC) &&
-           DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
-           !(ioflags & IO_INVIS)) {
+       if (*offset > xip->i_size) {
+               xfs_ilock(xip, XFS_ILOCK_EXCL);
+               if (*offset > xip->i_size)
+                       xip->i_size = *offset;
+               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+       }
 
-               xfs_rwunlock(bdp, locktype);
+       if (ret == -ENOSPC &&
+           DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
+               xfs_iunlock(xip, iolock);
                if (need_i_mutex)
                        mutex_unlock(&inode->i_mutex);
-               error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
-                               DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+               error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
+                               DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
                                0, 0, 0); /* Delay flag intentionally  unused */
-               if (error)
-                       goto out_nounlocks;
                if (need_i_mutex)
                        mutex_lock(&inode->i_mutex);
-               xfs_rwlock(bdp, locktype);
-               pos = xip->i_d.di_size;
-               ret = 0;
-               goto retry;
-       }
-
-       isize = i_size_read(inode);
-       if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
-               *offset = isize;
-
-       if (*offset > xip->i_d.di_size) {
-               xfs_ilock(xip, XFS_ILOCK_EXCL);
-               if (*offset > xip->i_d.di_size) {
-                       xip->i_d.di_size = *offset;
-                       i_size_write(inode, *offset);
-                       xip->i_update_core = 1;
-                       xip->i_update_size = 1;
-               }
-               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+               xfs_ilock(xip, iolock);
+               if (error)
+                       goto out_unlock_internal;
+               goto start;
        }
 
        error = -ret;
@@ -890,27 +806,45 @@ retry:
        XFS_STATS_ADD(xs_write_bytes, ret);
 
        /* Handle various SYNC-type writes */
-       if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
-               error = xfs_write_sync_logforce(mp, xip);
-               if (error)
-                       goto out_unlock_internal;
+       if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
+               loff_t end = pos + ret - 1;
+               int error2;
 
-               xfs_rwunlock(bdp, locktype);
+               xfs_iunlock(xip, iolock);
                if (need_i_mutex)
                        mutex_unlock(&inode->i_mutex);
 
-               error = sync_page_range(inode, mapping, pos, ret);
+               error2 = filemap_write_and_wait_range(mapping, pos, end);
                if (!error)
-                       error = ret;
-               return error;
+                       error = error2;
+               if (need_i_mutex)
+                       mutex_lock(&inode->i_mutex);
+               xfs_ilock(xip, iolock);
+
+               error2 = xfs_fsync(xip);
+               if (!error)
+                       error = error2;
        }
 
  out_unlock_internal:
-       xfs_rwunlock(bdp, locktype);
+       if (xip->i_new_size) {
+               xfs_ilock(xip, XFS_ILOCK_EXCL);
+               xip->i_new_size = 0;
+               /*
+                * If this was a direct or synchronous I/O that failed (such
+                * as ENOSPC) then part of the I/O may have been written to
+                * disk before the error occured.  In this case the on-disk
+                * file size may have been adjusted beyond the in-memory file
+                * size and now needs to be truncated back.
+                */
+               if (xip->i_d.di_size > xip->i_size)
+                       xip->i_d.di_size = xip->i_size;
+               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+       }
+       xfs_iunlock(xip, iolock);
  out_unlock_mutex:
        if (need_i_mutex)
                mutex_unlock(&inode->i_mutex);
- out_nounlocks:
        return -error;
 }
 
@@ -923,13 +857,7 @@ retry:
 int
 xfs_bdstrat_cb(struct xfs_buf *bp)
 {
-       xfs_mount_t     *mp;
-
-       mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
-       if (!XFS_FORCED_SHUTDOWN(mp)) {
-               xfs_buf_iorequest(bp);
-               return 0;
-       } else {
+       if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
                xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
                /*
                 * Metadata write that didn't get logged but
@@ -942,50 +870,29 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
                else
                        return (xfs_bioerror(bp));
        }
-}
-
 
-int
-xfs_bmap(bhv_desc_t    *bdp,
-       xfs_off_t       offset,
-       ssize_t         count,
-       int             flags,
-       xfs_iomap_t     *iomapp,
-       int             *niomaps)
-{
-       xfs_inode_t     *ip = XFS_BHVTOI(bdp);
-       xfs_iocore_t    *io = &ip->i_iocore;
-
-       ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-       ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
-              ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
-
-       return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
+       xfs_buf_iorequest(bp);
+       return 0;
 }
 
 /*
- * Wrapper around bdstrat so that we can stop data
- * from going to disk in case we are shutting down the filesystem.
- * Typically user data goes thru this path; one of the exceptions
- * is the superblock.
+ * Wrapper around bdstrat so that we can stop data from going to disk in case
+ * we are shutting down the filesystem.  Typically user data goes thru this
+ * path; one of the exceptions is the superblock.
  */
-int
+void
 xfsbdstrat(
        struct xfs_mount        *mp,
        struct xfs_buf          *bp)
 {
        ASSERT(mp);
        if (!XFS_FORCED_SHUTDOWN(mp)) {
-               /* Grio redirection would go here
-                * if (XFS_BUF_IS_GRIO(bp)) {
-                */
-
                xfs_buf_iorequest(bp);
-               return 0;
+               return;
        }
 
        xfs_buftrace("XFSBDSTRAT IOERROR", bp);
-       return (xfs_bioerror_relse(bp));
+       xfs_bioerror_relse(bp);
 }
 
 /*