exec: replace call_usermodehelper_pipe with use of umh init function and resolve...

[safe/jmp/linux-2.6] / fs / xfs / xfs_iomap.c
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index ab50621..ef14943 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,199 +42,74 @@
  #include "xfs_error.h"
  #include "xfs_itable.h"
  #include "xfs_rw.h"
-#include "xfs_acl.h"
  #include "xfs_attr.h"
  #include "xfs_buf_item.h"
  #include "xfs_trans_space.h"
  #include "xfs_utils.h"
  #include "xfs_iomap.h"
+#include "xfs_trace.h"
  
-#if defined(XFS_RW_TRACE)
-void
-xfs_iomap_enter_trace(
-       int             tag,
-       xfs_iocore_t    *io,
-       xfs_off_t       offset,
-       ssize_t         count)
-{
-       xfs_inode_t     *ip = XFS_IO_INODE(io);
-
-       if (!ip->i_rwtrace)
-               return;
-
-       ktrace_enter(ip->i_rwtrace,
-               (void *)((unsigned long)tag),
-               (void *)ip,
-               (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
-               (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(offset & 0xffffffff)),
-               (void *)((unsigned long)count),
-               (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
-               (void *)((unsigned long)current_pid()),
-               (void *)NULL,
-               (void *)NULL,
-               (void *)NULL,
-               (void *)NULL,
-               (void *)NULL,
-               (void *)NULL);
-}
-
-void
-xfs_iomap_map_trace(
-       int             tag,
-       xfs_iocore_t    *io,
-       xfs_off_t       offset,
-       ssize_t         count,
-       xfs_iomap_t     *iomapp,
-       xfs_bmbt_irec_t *imapp,
-       int             flags)
-{
-       xfs_inode_t     *ip = XFS_IO_INODE(io);
-
-       if (!ip->i_rwtrace)
-               return;
-
-       ktrace_enter(ip->i_rwtrace,
-               (void *)((unsigned long)tag),
-               (void *)ip,
-               (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
-               (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(offset & 0xffffffff)),
-               (void *)((unsigned long)count),
-               (void *)((unsigned long)flags),
-               (void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)),
-               (void *)((unsigned long)(iomapp->iomap_delta)),
-               (void *)((unsigned long)(iomapp->iomap_bsize)),
-               (void *)((unsigned long)(iomapp->iomap_bn)),
-               (void *)(__psint_t)(imapp->br_startoff),
-               (void *)((unsigned long)(imapp->br_blockcount)),
-               (void *)(__psint_t)(imapp->br_startblock));
-}
-#else
-#define xfs_iomap_enter_trace(tag, io, offset, count)
-#define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags)
-#endif
  
  #define XFS_WRITEIO_ALIGN(mp,off)      (((off) >> mp->m_writeio_log) \
                                                 << mp->m_writeio_log)
  #define XFS_STRAT_WRITE_IMAPS  2
  #define XFS_WRITE_IMAPS                XFS_BMAP_MAX_NMAP
  
-STATIC int
-xfs_imap_to_bmap(
-       xfs_iocore_t    *io,
-       xfs_off_t       offset,
-       xfs_bmbt_irec_t *imap,
-       xfs_iomap_t     *iomapp,
-       int             imaps,                  /* Number of imap entries */
-       int             iomaps,                 /* Number of iomap entries */
-       int             flags)
-{
-       xfs_mount_t     *mp;
-       xfs_fsize_t     nisize;
-       int             pbm;
-       xfs_fsblock_t   start_block;
-
-       mp = io->io_mount;
-       nisize = XFS_SIZE(mp, io);
-       if (io->io_new_size > nisize)
-               nisize = io->io_new_size;
-
-       for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
-               iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-               iomapp->iomap_delta = offset - iomapp->iomap_offset;
-               iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
-               iomapp->iomap_flags = flags;
-
-               if (io->io_flags & XFS_IOCORE_RT) {
-                       iomapp->iomap_flags |= IOMAP_REALTIME;
-                       iomapp->iomap_target = mp->m_rtdev_targp;
-               } else {
-                       iomapp->iomap_target = mp->m_ddev_targp;
-               }
-               start_block = imap->br_startblock;
-               if (start_block == HOLESTARTBLOCK) {
-                       iomapp->iomap_bn = IOMAP_DADDR_NULL;
-                       iomapp->iomap_flags |= IOMAP_HOLE;
-               } else if (start_block == DELAYSTARTBLOCK) {
-                       iomapp->iomap_bn = IOMAP_DADDR_NULL;
-                       iomapp->iomap_flags |= IOMAP_DELAY;
-               } else {
-                       iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block);
-                       if (ISUNWRITTEN(imap))
-                               iomapp->iomap_flags |= IOMAP_UNWRITTEN;
-               }
-
-               if ((iomapp->iomap_offset + iomapp->iomap_bsize) >= nisize) {
-                       iomapp->iomap_flags |= IOMAP_EOF;
-               }
-
-               offset += iomapp->iomap_bsize - iomapp->iomap_delta;
-       }
-       return pbm;     /* Return the number filled */
-}
+STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
+                                 int, struct xfs_bmbt_irec *, int *);
+STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
+                                struct xfs_bmbt_irec *, int *);
+STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
+                               struct xfs_bmbt_irec *, int *);
  
  int
  xfs_iomap(
-       xfs_iocore_t    *io,
-       xfs_off_t       offset,
-       ssize_t         count,
-       int             flags,
-       xfs_iomap_t     *iomapp,
-       int             *niomaps)
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       ssize_t                 count,
+       int                     flags,
+       struct xfs_bmbt_irec    *imap,
+       int                     *nimaps,
+       int                     *new)
  {
-       xfs_mount_t     *mp = io->io_mount;
-       xfs_fileoff_t   offset_fsb, end_fsb;
-       int             error = 0;
-       int             lockmode = 0;
-       xfs_bmbt_irec_t imap;
-       int             nimaps = 1;
-       int             bmapi_flags = 0;
-       int             iomap_flags = 0;
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     lockmode = 0;
+       int                     bmapi_flags = 0;
+
+       ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
+
+       *new = 0;
  
         if (XFS_FORCED_SHUTDOWN(mp))
                 return XFS_ERROR(EIO);
  
-       switch (flags &
-               (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE |
-                BMAPI_UNWRITTEN | BMAPI_DEVICE)) {
+       trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
+
+       switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
         case BMAPI_READ:
-               xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
-               lockmode = XFS_LCK_MAP_SHARED(mp, io);
+               lockmode = xfs_ilock_map_shared(ip);
                 bmapi_flags = XFS_BMAPI_ENTIRE;
                 break;
         case BMAPI_WRITE:
-               xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
-               lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
+               lockmode = XFS_ILOCK_EXCL;
                 if (flags & BMAPI_IGNSTATE)
                         bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
-               XFS_ILOCK(mp, io, lockmode);
+               xfs_ilock(ip, lockmode);
                 break;
         case BMAPI_ALLOCATE:
-               xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, io, offset, count);
-               lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
+               lockmode = XFS_ILOCK_SHARED;
                 bmapi_flags = XFS_BMAPI_ENTIRE;
+
                 /* Attempt non-blocking lock */
                 if (flags & BMAPI_TRYLOCK) {
-                       if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
+                       if (!xfs_ilock_nowait(ip, lockmode))
                                 return XFS_ERROR(EAGAIN);
                 } else {
-                       XFS_ILOCK(mp, io, lockmode);
+                       xfs_ilock(ip, lockmode);
                 }
                 break;
-       case BMAPI_UNWRITTEN:
-               goto phase2;
-       case BMAPI_DEVICE:
-               lockmode = XFS_LCK_MAP_SHARED(mp, io);
-               iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
-                       mp->m_rtdev_targp : mp->m_ddev_targp;
-               error = 0;
-               *niomaps = 1;
-               goto out;
         default:
                 BUG();
         }
@@ -245,78 +120,63 @@ xfs_iomap(
         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
  
-       error = XFS_BMAPI(mp, NULL, io, offset_fsb,
+       error = xfs_bmapi(NULL, ip, offset_fsb,
                         (xfs_filblks_t)(end_fsb - offset_fsb),
-                       bmapi_flags,  NULL, 0, &imap,
-                       &nimaps, NULL, NULL);
+                       bmapi_flags,  NULL, 0, imap,
+                       nimaps, NULL, NULL);
  
         if (error)
                 goto out;
  
-phase2:
-       switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) {
+       switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
         case BMAPI_WRITE:
                 /* If we found an extent, return it */
-               if (nimaps &&
-                   (imap.br_startblock != HOLESTARTBLOCK) &&
-                   (imap.br_startblock != DELAYSTARTBLOCK)) {
-                       xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
-                                       offset, count, iomapp, &imap, flags);
+               if (*nimaps &&
+                   (imap->br_startblock != HOLESTARTBLOCK) &&
+                   (imap->br_startblock != DELAYSTARTBLOCK)) {
+                       trace_xfs_iomap_found(ip, offset, count, flags, imap);
                         break;
                 }
  
                 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
-                       error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
-                                       count, flags, &imap, &nimaps, nimaps);
+                       error = xfs_iomap_write_direct(ip, offset, count, flags,
+                                                      imap, nimaps);
                 } else {
-                       error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count,
-                                       flags, &imap, &nimaps);
+                       error = xfs_iomap_write_delay(ip, offset, count, flags,
+                                                     imap, nimaps);
                 }
                 if (!error) {
-                       xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, io,
-                                       offset, count, iomapp, &imap, flags);
+                       trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
                 }
-               iomap_flags = IOMAP_NEW;
+               *new = 1;
                 break;
         case BMAPI_ALLOCATE:
                 /* If we found an extent, return it */
-               XFS_IUNLOCK(mp, io, lockmode);
+               xfs_iunlock(ip, lockmode);
                 lockmode = 0;
  
-               if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock)) {
-                       xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
-                                       offset, count, iomapp, &imap, flags);
+               if (*nimaps && !isnullstartblock(imap->br_startblock)) {
+                       trace_xfs_iomap_found(ip, offset, count, flags, imap);
                         break;
                 }
  
-               error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count,
-                                                &imap, &nimaps);
-               break;
-       case BMAPI_UNWRITTEN:
-               lockmode = 0;
-               error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
-               nimaps = 0;
+               error = xfs_iomap_write_allocate(ip, offset, count,
+                                                imap, nimaps);
                 break;
         }
  
-       if (nimaps) {
-               *niomaps = xfs_imap_to_bmap(io, offset, &imap,
-                                           iomapp, nimaps, *niomaps, iomap_flags);
-       } else if (niomaps) {
-               *niomaps = 0;
-       }
+       ASSERT(*nimaps <= 1);
  
  out:
         if (lockmode)
-               XFS_IUNLOCK(mp, io, lockmode);
+               xfs_iunlock(ip, lockmode);
         return XFS_ERROR(error);
  }
  
  STATIC int
  xfs_iomap_eof_align_last_fsb(
         xfs_mount_t     *mp,
-       xfs_iocore_t    *io,
-       xfs_fsize_t     isize,
+       xfs_inode_t     *ip,
         xfs_extlen_t    extsize,
         xfs_fileoff_t   *last_fsb)
  {
@@ -324,7 +184,7 @@ xfs_iomap_eof_align_last_fsb(
         xfs_extlen_t    align;
         int             eof, error;
  
-       if (io->io_flags & XFS_IOCORE_RT)
+       if (XFS_IS_REALTIME_INODE(ip))
                 ;
         /*
          * If mounted with the "-o swalloc" option, roundup the allocation
@@ -332,14 +192,14 @@ xfs_iomap_eof_align_last_fsb(
          * stripe width and we are allocating past the allocation eof.
          */
         else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
-               (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
+               (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
                 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
         /*
          * Roundup the allocation request to a stripe unit (m_dalign) boundary
          * if the file size is >= stripe unit size, and we are allocating past
          * the allocation eof.
          */
-       else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
+       else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
                 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
  
         /*
@@ -355,7 +215,7 @@ xfs_iomap_eof_align_last_fsb(
         }
  
         if (new_last_fsb) {
-               error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+               error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
                 if (error)
                         return error;
                 if (eof)
@@ -365,38 +225,6 @@ xfs_iomap_eof_align_last_fsb(
  }
  
  STATIC int
-xfs_flush_space(
-       xfs_inode_t     *ip,
-       int             *fsynced,
-       int             *ioflags)
-{
-       switch (*fsynced) {
-       case 0:
-               if (ip->i_delayed_blks) {
-                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                       xfs_flush_inode(ip);
-                       xfs_ilock(ip, XFS_ILOCK_EXCL);
-                       *fsynced = 1;
-               } else {
-                       *ioflags |= BMAPI_SYNC;
-                       *fsynced = 2;
-               }
-               return 0;
-       case 1:
-               *fsynced = 2;
-               *ioflags |= BMAPI_SYNC;
-               return 0;
-       case 2:
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               xfs_flush_device(ip);
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               *fsynced = 3;
-               return 0;
-       }
-       return 1;
-}
-
-STATIC int
  xfs_cmn_err_fsblock_zero(
         xfs_inode_t     *ip,
         xfs_bmbt_irec_t *imap)
@@ -413,24 +241,21 @@ xfs_cmn_err_fsblock_zero(
         return EFSCORRUPTED;
  }
  
-int
+STATIC int
  xfs_iomap_write_direct(
         xfs_inode_t     *ip,
         xfs_off_t       offset,
         size_t          count,
         int             flags,
         xfs_bmbt_irec_t *ret_imap,
-       int             *nmaps,
-       int             found)
+       int             *nmaps)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb;
         xfs_fileoff_t   last_fsb;
         xfs_filblks_t   count_fsb, resaligned;
         xfs_fsblock_t   firstfsb;
         xfs_extlen_t    extsz, temp;
-       xfs_fsize_t     isize;
         int             nimaps;
         int             bmapi_flag;
         int             quota_flag;
@@ -446,31 +271,21 @@ xfs_iomap_write_direct(
          * Make sure that the dquots are there. This doesn't hold
          * the ilock across a disk read.
          */
-       error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+       error = xfs_qm_dqattach_locked(ip, 0);
         if (error)
                 return XFS_ERROR(error);
  
         rt = XFS_IS_REALTIME_INODE(ip);
-       if (unlikely(rt)) {
-               if (!(extsz = ip->i_d.di_extsize))
-                       extsz = mp->m_sb.sb_rextsize;
-       } else {
-               extsz = ip->i_d.di_extsize;
-       }
-
-       isize = ip->i_size;
-       if (io->io_new_size > isize)
-               isize = io->io_new_size;
+       extsz = xfs_get_extsz_hint(ip);
  
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-       if ((offset + count) > isize) {
-               error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
-                                                       &last_fsb);
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+       if ((offset + count) > ip->i_size) {
+               error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
                 if (error)
                         goto error_out;
         } else {
-               if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
+               if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK))
                         last_fsb = MIN(last_fsb, (xfs_fileoff_t)
                                         ret_imap->br_blockcount +
                                         ret_imap->br_startoff);
@@ -515,8 +330,7 @@ xfs_iomap_write_direct(
         if (error)
                 goto error_out;
  
-       error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-                                             qblocks, 0, quota_flag);
+       error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
         if (error)
                 goto error1;
  
@@ -530,9 +344,9 @@ xfs_iomap_write_direct(
         /*
          * Issue the xfs_bmapi() call to allocate the blocks
          */
-       XFS_BMAP_INIT(&free_list, &firstfsb);
+       xfs_bmap_init(&free_list, &firstfsb);
         nimaps = 1;
-       error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag,
+       error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
                 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
         if (error)
                 goto error0;
@@ -555,7 +369,7 @@ xfs_iomap_write_direct(
                 goto error_out;
         }
  
-       if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) {
+       if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
                 error = xfs_cmn_err_fsblock_zero(ip, &imap);
                 goto error_out;
         }
@@ -566,7 +380,7 @@ xfs_iomap_write_direct(
  
  error0:        /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
         xfs_bmap_cancel(&free_list);
-       XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+       xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
  
  error1:        /* Just cancel transaction */
         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -577,21 +391,14 @@ error_out:
  }
  
  /*
- * If the caller is doing a write at the end of the file,
- * then extend the allocation out to the file system's write
- * iosize.  We clean up any extra space left over when the
- * file is closed in xfs_inactive().
- *
- * For sync writes, we are flushing delayed allocate space to
- * try to make additional space available for allocation near
- * the filesystem full boundary - preallocation hurts in that
- * situation, of course.
+ * If the caller is doing a write at the end of the file, then extend the
+ * allocation out to the file system's write iosize.  We clean up any extra
+ * space left over when the file is closed in xfs_inactive().
   */
  STATIC int
  xfs_iomap_eof_want_preallocate(
         xfs_mount_t     *mp,
-       xfs_iocore_t    *io,
-       xfs_fsize_t     isize,
+       xfs_inode_t     *ip,
         xfs_off_t       offset,
         size_t          count,
         int             ioflag,
@@ -605,7 +412,7 @@ xfs_iomap_eof_want_preallocate(
         int             n, error, imaps;
  
         *prealloc = 0;
-       if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
+       if ((offset + count) <= ip->i_size)
                 return 0;
  
         /*
@@ -617,7 +424,7 @@ xfs_iomap_eof_want_preallocate(
         while (count_fsb > 0) {
                 imaps = nimaps;
                 firstblock = NULLFSBLOCK;
-               error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0,
+               error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
                                   &firstblock, 0, imap, &imaps, NULL, NULL);
                 if (error)
                         return error;
@@ -633,7 +440,7 @@ xfs_iomap_eof_want_preallocate(
         return 0;
  }
  
-int
+STATIC int
  xfs_iomap_write_delay(
         xfs_inode_t     *ip,
         xfs_off_t       offset,
@@ -643,48 +450,36 @@ xfs_iomap_write_delay(
         int             *nmaps)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb;
         xfs_fileoff_t   last_fsb;
         xfs_off_t       aligned_offset;
         xfs_fileoff_t   ioalign;
         xfs_fsblock_t   firstblock;
         xfs_extlen_t    extsz;
-       xfs_fsize_t     isize;
         int             nimaps;
         xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-       int             prealloc, fsynced = 0;
+       int             prealloc, flushed = 0;
         int             error;
  
-       ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  
         /*
          * Make sure that the dquots are there. This doesn't hold
          * the ilock across a disk read.
          */
-       error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+       error = xfs_qm_dqattach_locked(ip, 0);
         if (error)
                 return XFS_ERROR(error);
  
-       if (XFS_IS_REALTIME_INODE(ip)) {
-               if (!(extsz = ip->i_d.di_extsize))
-                       extsz = mp->m_sb.sb_rextsize;
-       } else {
-               extsz = ip->i_d.di_extsize;
-       }
-
+       extsz = xfs_get_extsz_hint(ip);
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
  
-retry:
-       isize = ip->i_size;
-       if (io->io_new_size > isize)
-               isize = io->io_new_size;
-
-       error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
+       error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
                                 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
         if (error)
                 return error;
  
+retry:
         if (prealloc) {
                 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
                 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@@ -694,15 +489,14 @@ retry:
         }
  
         if (prealloc || extsz) {
-               error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
-                                                       &last_fsb);
+               error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
                 if (error)
                         return error;
         }
  
         nimaps = XFS_WRITE_IMAPS;
         firstblock = NULLFSBLOCK;
-       error = XFS_BMAPI(mp, NULL, io, offset_fsb,
+       error = xfs_bmapi(NULL, ip, offset_fsb,
                           (xfs_filblks_t)(last_fsb - offset_fsb),
                           XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
                           XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
@@ -712,19 +506,25 @@ retry:
  
         /*
          * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-        * then we must have run out of space - flush delalloc, and retry..
+        * then we must have run out of space - flush all other inodes with
+        * delalloc blocks and retry without EOF preallocation.
          */
         if (nimaps == 0) {
-               xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
-                                       io, offset, count);
-               if (xfs_flush_space(ip, &fsynced, &ioflag))
+               trace_xfs_delalloc_enospc(ip, offset, count);
+               if (flushed)
                         return XFS_ERROR(ENOSPC);
  
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+               xfs_flush_inodes(ip);
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+               flushed = 1;
                 error = 0;
+               prealloc = 0;
                 goto retry;
         }
  
-       if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT)))
+       if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
                 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
  
         *ret_imap = imap[0];
@@ -739,8 +539,11 @@ retry:
   * the originating callers request.
   *
   * Called without a lock on the inode.
+ *
+ * We no longer bother to look at the incoming map - all we have to
+ * guarantee is that whatever we allocate fills the required range.
   */
-int
+STATIC int
  xfs_iomap_write_allocate(
         xfs_inode_t     *ip,
         xfs_off_t       offset,
@@ -749,15 +552,14 @@ xfs_iomap_write_allocate(
         int             *retmap)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb, last_block;
         xfs_fileoff_t   end_fsb, map_start_fsb;
         xfs_fsblock_t   first_block;
         xfs_bmap_free_t free_list;
         xfs_filblks_t   count_fsb;
-       xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS];
+       xfs_bmbt_irec_t imap;
         xfs_trans_t     *tp;
-       int             i, nimaps, committed;
+       int             nimaps, committed;
         int             error = 0;
         int             nres;
  
@@ -766,7 +568,8 @@ xfs_iomap_write_allocate(
         /*
          * Make sure that the dquots are there.
          */
-       if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+       error = xfs_qm_dqattach(ip, 0);
+       if (error)
                 return XFS_ERROR(error);
  
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -802,18 +605,46 @@ xfs_iomap_write_allocate(
                         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
                         xfs_trans_ihold(tp, ip);
  
-                       XFS_BMAP_INIT(&free_list, &first_block);
+                       xfs_bmap_init(&free_list, &first_block);
  
-                       nimaps = XFS_STRAT_WRITE_IMAPS;
                         /*
-                        * Ensure we don't go beyond eof - it is possible
-                        * the extents changed since we did the read call,
-                        * we dropped the ilock in the interim.
+                        * it is possible that the extents have changed since
+                        * we did the read call as we dropped the ilock for a
+                        * while. We have to be careful about truncates or hole
+                        * punchs here - we are not allowed to allocate
+                        * non-delalloc blocks here.
+                        *
+                        * The only protection against truncation is the pages
+                        * for the range we are being asked to convert are
+                        * locked and hence a truncate will block on them
+                        * first.
+                        *
+                        * As a result, if we go beyond the range we really
+                        * need and hit an delalloc extent boundary followed by
+                        * a hole while we have excess blocks in the map, we
+                        * will fill the hole incorrectly and overrun the
+                        * transaction reservation.
+                        *
+                        * Using a single map prevents this as we are forced to
+                        * check each map we look for overlap with the desired
+                        * range and abort as soon as we find it. Also, given
+                        * that we only return a single map, having one beyond
+                        * what we can return is probably a bit silly.
+                        *
+                        * We also need to check that we don't go beyond EOF;
+                        * this is a truncate optimisation as a truncate sets
+                        * the new file size before block on the pages we
+                        * currently have locked under writeback. Because they
+                        * are about to be tossed, we don't need to write them
+                        * back....
                          */
-
+                       nimaps = 1;
                         end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
-                       xfs_bmap_last_offset(NULL, ip, &last_block,
-                               XFS_DATA_FORK);
+                       error = xfs_bmap_last_offset(NULL, ip, &last_block,
+                                                       XFS_DATA_FORK);
+                       if (error)
+                               goto trans_cancel;
+
                         last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
                         if ((map_start_fsb + count_fsb) > last_block) {
                                 count_fsb = last_block - map_start_fsb;
@@ -824,9 +655,9 @@ xfs_iomap_write_allocate(
                         }
  
                         /* Go get the actual blocks */
-                       error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb,
+                       error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
                                         XFS_BMAPI_WRITE, &first_block, 1,
-                                       imap, &nimaps, &free_list, NULL);
+                                       &imap, &nimaps, &free_list, NULL);
                         if (error)
                                 goto trans_cancel;
  
@@ -845,27 +676,24 @@ xfs_iomap_write_allocate(
                  * See if we were able to allocate an extent that
                  * covers at least part of the callers request
                  */
-               for (i = 0; i < nimaps; i++) {
-                       if (unlikely(!imap[i].br_startblock &&
-                                    !(io->io_flags & XFS_IOCORE_RT)))
-                               return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
-                       if ((offset_fsb >= imap[i].br_startoff) &&
-                           (offset_fsb < (imap[i].br_startoff +
-                                          imap[i].br_blockcount))) {
-                               *map = imap[i];
-                               *retmap = 1;
-                               XFS_STATS_INC(xs_xstrat_quick);
-                               return 0;
-                       }
-                       count_fsb -= imap[i].br_blockcount;
+               if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
+                       return xfs_cmn_err_fsblock_zero(ip, &imap);
+
+               if ((offset_fsb >= imap.br_startoff) &&
+                   (offset_fsb < (imap.br_startoff +
+                                  imap.br_blockcount))) {
+                       *map = imap;
+                       *retmap = 1;
+                       XFS_STATS_INC(xs_xstrat_quick);
+                       return 0;
                 }
  
-               /* So far we have not mapped the requested part of the
+               /*
+                * So far we have not mapped the requested part of the
                  * file, just surrounding data, try again.
                  */
-               nimaps--;
-               map_start_fsb = imap[nimaps].br_startoff +
-                               imap[nimaps].br_blockcount;
+               count_fsb -= imap.br_blockcount;
+               map_start_fsb = imap.br_startoff + imap.br_blockcount;
         }
  
  trans_cancel:
@@ -883,7 +711,6 @@ xfs_iomap_write_unwritten(
         size_t          count)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb;
         xfs_filblks_t   count_fsb;
         xfs_filblks_t   numblks_fsb;
@@ -896,13 +723,22 @@ xfs_iomap_write_unwritten(
         int             committed;
         int             error;
  
-       xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
-                               &ip->i_iocore, offset, count);
+       trace_xfs_unwritten_convert(ip, offset, count);
  
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
         count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
  
+       /*
+        * Reserve enough blocks in this transaction for two complete extent
+        * btree splits.  We may be converting the middle part of an unwritten
+        * extent and in this case we will insert two new extents in the btree
+        * each of which could cause a full split.
+        *
+        * This reservation amount will be used in the first call to
+        * xfs_bmbt_split() to select an AG with enough space to satisfy the
+        * rest of the operation.
+        */
         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
  
         do {
@@ -910,8 +746,15 @@ xfs_iomap_write_unwritten(
                  * set up a transaction to convert the range of extents
                  * from unwritten to real. Do allocations in a loop until
                  * we have covered the range passed in.
+                *
+                * Note that we open code the transaction allocation here
+                * to pass KM_NOFS--we can't risk to recursing back into
+                * the filesystem here as we might be asked to write out
+                * the same inode that we complete here and might deadlock
+                * on the iolock.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+               xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
+               tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
                 tp->t_flags |= XFS_TRANS_RESERVE;
                 error = xfs_trans_reserve(tp, resblks,
                                 XFS_WRITE_LOG_RES(mp), 0,
@@ -929,9 +772,9 @@ xfs_iomap_write_unwritten(
                 /*
                  * Modify the unwritten extent state of the buffer.
                  */
-               XFS_BMAP_INIT(&free_list, &firstfsb);
+               xfs_bmap_init(&free_list, &firstfsb);
                 nimaps = 1;
-               error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb,
+               error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
                                   XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
                                   1, &imap, &nimaps, &free_list, NULL);
                 if (error)
@@ -946,8 +789,7 @@ xfs_iomap_write_unwritten(
                 if (error)
                         return XFS_ERROR(error);
  
-               if (unlikely(!imap.br_startblock &&
-                            !(io->io_flags & XFS_IOCORE_RT)))
+               if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
                         return xfs_cmn_err_fsblock_zero(ip, &imap);
  
                 if ((numblks_fsb = imap.br_blockcount) == 0) {