xfs: fix mmap_sem/iolock inversion in xfs_free_eofblocks
authorChristoph Hellwig <hch@infradead.org>
Mon, 19 Oct 2009 04:03:46 +0000 (04:03 +0000)
committerAlex Elder <aelder@sgi.com>
Fri, 11 Dec 2009 21:11:19 +0000 (15:11 -0600)
When xfs_free_eofblocks is called from ->release the VM might already
hold the mmap_sem, but in the write path we take the iolock before
taking the mmap_sem in the generic write code.

Switch xfs_free_eofblocks to only trylock the iolock if called from
->release and skip trimming the prellocated blocks in that case.
We'll still free them later on the final iput.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
fs/xfs/xfs_rw.h
fs/xfs/xfs_vnodeops.c

index f5e4874..726014d 100644 (file)
@@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
 }
 
 /*
- * Flags for xfs_free_eofblocks
- */
-#define XFS_FREE_EOF_LOCK      (1<<0)
-#define XFS_FREE_EOF_NOLOCK    (1<<1)
-
-
-/*
  * helper function to extract extent size hint from inode
  */
 STATIC_INLINE xfs_extlen_t
index 3fac146..d984014 100644 (file)
@@ -709,6 +709,11 @@ xfs_fsync(
 }
 
 /*
+ * Flags for xfs_free_eofblocks
+ */
+#define XFS_FREE_EOF_TRYLOCK   (1<<0)
+
+/*
  * This is called by xfs_inactive to free any blocks beyond eof
  * when the link count isn't zero and by xfs_dm_punch_hole() when
  * punching a hole to EOF.
@@ -726,7 +731,6 @@ xfs_free_eofblocks(
        xfs_filblks_t   map_len;
        int             nimaps;
        xfs_bmbt_irec_t imap;
-       int             use_iolock = (flags & XFS_FREE_EOF_LOCK);
 
        /*
         * Figure out if there are any blocks beyond the end
@@ -768,14 +772,19 @@ xfs_free_eofblocks(
                 * cache and we can't
                 * do that within a transaction.
                 */
-               if (use_iolock)
+               if (flags & XFS_FREE_EOF_TRYLOCK) {
+                       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+                               xfs_trans_cancel(tp, 0);
+                               return 0;
+                       }
+               } else {
                        xfs_ilock(ip, XFS_IOLOCK_EXCL);
+               }
                error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
                                    ip->i_size);
                if (error) {
                        xfs_trans_cancel(tp, 0);
-                       if (use_iolock)
-                               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+                       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return error;
                }
 
@@ -812,8 +821,7 @@ xfs_free_eofblocks(
                        error = xfs_trans_commit(tp,
                                                XFS_TRANS_RELEASE_LOG_RES);
                }
-               xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
-                                           : XFS_ILOCK_EXCL));
+               xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
        }
        return error;
 }
@@ -1113,7 +1121,17 @@ xfs_release(
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
                    (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-                       error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+
+                       /*
+                        * If we can't get the iolock just skip truncating
+                        * the blocks past EOF because we could deadlock
+                        * with the mmap_sem otherwise.  We'll get another
+                        * chance to drop them once the last reference to
+                        * the inode is dropped, so we'll never leak blocks
+                        * permanently.
+                        */
+                       error = xfs_free_eofblocks(mp, ip,
+                                                  XFS_FREE_EOF_TRYLOCK);
                        if (error)
                                return error;
                }
@@ -1184,7 +1202,7 @@ xfs_inactive(
                     (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
                      (ip->i_delayed_blks != 0)))) {
-                       error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+                       error = xfs_free_eofblocks(mp, ip, 0);
                        if (error)
                                return VN_INACTIVE_CACHE;
                }