Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux...
[safe/jmp/linux-2.6] / fs / xfs / xfs_vnodeops.c
index ceecafd..9d376be 100644 (file)
@@ -53,6 +53,7 @@
 #include "xfs_log_priv.h"
 #include "xfs_filestream.h"
 #include "xfs_vnodeops.h"
+#include "xfs_trace.h"
 
 int
 xfs_setattr(
@@ -69,7 +70,6 @@ xfs_setattr(
        uint                    commit_flags=0;
        uid_t                   uid=0, iuid=0;
        gid_t                   gid=0, igid=0;
-       int                     timeflags = 0;
        struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
        int                     need_iolock = 1;
 
@@ -134,16 +134,13 @@ xfs_setattr(
        if (flags & XFS_ATTR_NOLOCK)
                need_iolock = 0;
        if (!(mask & ATTR_SIZE)) {
-               if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
-                   (mp->m_flags & XFS_MOUNT_WSYNC)) {
-                       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-                       commit_flags = 0;
-                       if ((code = xfs_trans_reserve(tp, 0,
-                                                    XFS_ICHANGE_LOG_RES(mp), 0,
-                                                    0, 0))) {
-                               lock_flags = 0;
-                               goto error_return;
-                       }
+               tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+               commit_flags = 0;
+               code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
+                                        0, 0, 0);
+               if (code) {
+                       lock_flags = 0;
+                       goto error_return;
                }
        } else {
                if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -259,7 +256,7 @@ xfs_setattr(
                    iattr->ia_size > ip->i_d.di_size) {
                        code = xfs_flush_pages(ip,
                                        ip->i_d.di_size, iattr->ia_size,
-                                       XFS_B_ASYNC, FI_NONE);
+                                       XBF_ASYNC, FI_NONE);
                }
 
                /* wait for all I/O to complete */
@@ -294,15 +291,23 @@ xfs_setattr(
                 * or we are explicitly asked to change it. This handles
                 * the semantic difference between truncate() and ftruncate()
                 * as implemented in the VFS.
+                *
+                * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
+                * is a special case where we need to update the times despite
+                * not having these flags set.  For all other operations the
+                * VFS set these flags explicitly if it wants a timestamp
+                * update.
                 */
-               if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
-                       timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+               if (iattr->ia_size != ip->i_size &&
+                   (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+                       iattr->ia_ctime = iattr->ia_mtime =
+                               current_fs_time(inode->i_sb);
+                       mask |= ATTR_CTIME | ATTR_MTIME;
+               }
 
                if (iattr->ia_size > ip->i_size) {
                        ip->i_d.di_size = iattr->ia_size;
                        ip->i_size = iattr->ia_size;
-                       if (!(flags & XFS_ATTR_DMI))
-                               xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
                        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                } else if (iattr->ia_size <= ip->i_size ||
                           (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -373,9 +378,6 @@ xfs_setattr(
                        ip->i_d.di_gid = gid;
                        inode->i_gid = gid;
                }
-
-               xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
-               timeflags |= XFS_ICHGTIME_CHG;
        }
 
        /*
@@ -392,51 +394,37 @@ xfs_setattr(
 
                inode->i_mode &= S_IFMT;
                inode->i_mode |= mode & ~S_IFMT;
-
-               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               timeflags |= XFS_ICHGTIME_CHG;
        }
 
        /*
         * Change file access or modified times.
         */
-       if (mask & (ATTR_ATIME|ATTR_MTIME)) {
-               if (mask & ATTR_ATIME) {
-                       inode->i_atime = iattr->ia_atime;
-                       ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-                       ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-                       ip->i_update_core = 1;
-               }
-               if (mask & ATTR_MTIME) {
-                       inode->i_mtime = iattr->ia_mtime;
-                       ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-                       ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-                       timeflags &= ~XFS_ICHGTIME_MOD;
-                       timeflags |= XFS_ICHGTIME_CHG;
-               }
-               if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
-                       xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+       if (mask & ATTR_ATIME) {
+               inode->i_atime = iattr->ia_atime;
+               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+               ip->i_update_core = 1;
        }
-
-       /*
-        * Change file inode change time only if ATTR_CTIME set
-        * AND we have been called by a DMI function.
-        */
-
-       if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
+       if (mask & ATTR_CTIME) {
                inode->i_ctime = iattr->ia_ctime;
                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
                ip->i_update_core = 1;
-               timeflags &= ~XFS_ICHGTIME_CHG;
+       }
+       if (mask & ATTR_MTIME) {
+               inode->i_mtime = iattr->ia_mtime;
+               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+               ip->i_update_core = 1;
        }
 
        /*
-        * Send out timestamp changes that need to be set to the
-        * current time.  Not done when called by a DMI function.
+        * And finally, log the inode core if any attribute in it
+        * has been changed.
         */
-       if (timeflags && !(flags & XFS_ATTR_DMI))
-               xfs_ichgtime(ip, timeflags);
+       if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
+                   ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
        XFS_STATS_INC(xs_ig_attrchg);
 
@@ -451,12 +439,10 @@ xfs_setattr(
         * mix so this probably isn't worth the trouble to optimize.
         */
        code = 0;
-       if (tp) {
-               if (mp->m_flags & XFS_MOUNT_WSYNC)
-                       xfs_trans_set_sync(tp);
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
 
-               code = xfs_trans_commit(tp, commit_flags);
-       }
+       code = xfs_trans_commit(tp, commit_flags);
 
        xfs_iunlock(ip, lock_flags);
 
@@ -538,9 +524,8 @@ xfs_readlink_bmap(
                d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
                byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
 
-               bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
-                                       XBF_LOCK | XBF_MAPPED |
-                                       XBF_DONT_BLOCK);
+               bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
+                                 XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
                error = XFS_BUF_GETERROR(bp);
                if (error) {
                        xfs_ioerror_alert("xfs_readlink",
@@ -599,119 +584,9 @@ xfs_readlink(
 }
 
 /*
- * xfs_fsync
- *
- * This is called to sync the inode and its data out to disk.  We need to hold
- * the I/O lock while flushing the data, and the inode lock while flushing the
- * inode.  The inode lock CANNOT be held while flushing the data, so acquire
- * after we're done with that.
+ * Flags for xfs_free_eofblocks
  */
-int
-xfs_fsync(
-       xfs_inode_t     *ip)
-{
-       xfs_trans_t     *tp;
-       int             error;
-       int             log_flushed = 0, changed = 1;
-
-       xfs_itrace_entry(ip);
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return XFS_ERROR(EIO);
-
-       /* capture size updates in I/O completion before writing the inode. */
-       error = xfs_wait_on_pages(ip, 0, -1);
-       if (error)
-               return XFS_ERROR(error);
-
-       /*
-        * We always need to make sure that the required inode state is safe on
-        * disk.  The vnode might be clean but we still might need to force the
-        * log because of committed transactions that haven't hit the disk yet.
-        * Likewise, there could be unflushed non-transactional changes to the
-        * inode core that have to go to disk and this requires us to issue
-        * a synchronous transaction to capture these changes correctly.
-        *
-        * This code relies on the assumption that if the update_* fields
-        * of the inode are clear and the inode is unpinned then it is clean
-        * and no action is required.
-        */
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-       if (!(ip->i_update_size || ip->i_update_core)) {
-               /*
-                * Timestamps/size haven't changed since last inode flush or
-                * inode transaction commit.  That means either nothing got
-                * written or a transaction committed which caught the updates.
-                * If the latter happened and the transaction hasn't hit the
-                * disk yet, the inode will be still be pinned.  If it is,
-                * force the log.
-                */
-
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-               if (xfs_ipincount(ip)) {
-                       error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
-                                     XFS_LOG_FORCE | XFS_LOG_SYNC,
-                                     &log_flushed);
-               } else {
-                       /*
-                        * If the inode is not pinned and nothing has changed
-                        * we don't need to flush the cache.
-                        */
-                       changed = 0;
-               }
-       } else  {
-               /*
-                * Kick off a transaction to log the inode core to get the
-                * updates.  The sync transaction will also force the log.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-               tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, 0,
-                               XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       return error;
-               }
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * Note - it's possible that we might have pushed ourselves out
-                * of the way during trans_reserve which would flush the inode.
-                * But there's no guarantee that the inode buffer has actually
-                * gone out yet (it's delwri).  Plus the buffer could be pinned
-                * anyway if it's part of an inode in another recent
-                * transaction.  So we play it safe and fire off the
-                * transaction anyway.
-                */
-               xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-               xfs_trans_ihold(tp, ip);
-               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               xfs_trans_set_sync(tp);
-               error = _xfs_trans_commit(tp, 0, &log_flushed);
-
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-
-       if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
-               /*
-                * If the log write didn't issue an ordered tag we need
-                * to flush the disk cache for the data device now.
-                */
-               if (!log_flushed)
-                       xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
-
-               /*
-                * If this inode is on the RT dev we need to flush that
-                * cache as well.
-                */
-               if (XFS_IS_REALTIME_INODE(ip))
-                       xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
-       }
-
-       return error;
-}
+#define XFS_FREE_EOF_TRYLOCK   (1<<0)
 
 /*
  * This is called by xfs_inactive to free any blocks beyond eof
@@ -731,7 +606,6 @@ xfs_free_eofblocks(
        xfs_filblks_t   map_len;
        int             nimaps;
        xfs_bmbt_irec_t imap;
-       int             use_iolock = (flags & XFS_FREE_EOF_LOCK);
 
        /*
         * Figure out if there are any blocks beyond the end
@@ -773,14 +647,19 @@ xfs_free_eofblocks(
                 * cache and we can't
                 * do that within a transaction.
                 */
-               if (use_iolock)
+               if (flags & XFS_FREE_EOF_TRYLOCK) {
+                       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+                               xfs_trans_cancel(tp, 0);
+                               return 0;
+                       }
+               } else {
                        xfs_ilock(ip, XFS_IOLOCK_EXCL);
+               }
                error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
                                    ip->i_size);
                if (error) {
                        xfs_trans_cancel(tp, 0);
-                       if (use_iolock)
-                               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+                       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return error;
                }
 
@@ -817,8 +696,7 @@ xfs_free_eofblocks(
                        error = xfs_trans_commit(tp,
                                                XFS_TRANS_RELEASE_LOG_RES);
                }
-               xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
-                                           : XFS_ILOCK_EXCL));
+               xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
        }
        return error;
 }
@@ -1108,7 +986,7 @@ xfs_release(
                 */
                truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
                if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
-                       xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
+                       xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
        }
 
        if (ip->i_d.di_nlink != 0) {
@@ -1118,7 +996,17 @@ xfs_release(
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
                    (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-                       error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+
+                       /*
+                        * If we can't get the iolock just skip truncating
+                        * the blocks past EOF because we could deadlock
+                        * with the mmap_sem otherwise.  We'll get another
+                        * chance to drop them once the last reference to
+                        * the inode is dropped, so we'll never leak blocks
+                        * permanently.
+                        */
+                       error = xfs_free_eofblocks(mp, ip,
+                                                  XFS_FREE_EOF_TRYLOCK);
                        if (error)
                                return error;
                }
@@ -1189,7 +1077,7 @@ xfs_inactive(
                     (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
                      (ip->i_delayed_blks != 0)))) {
-                       error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+                       error = xfs_free_eofblocks(mp, ip, 0);
                        if (error)
                                return VN_INACTIVE_CACHE;
                }
@@ -1385,7 +1273,6 @@ xfs_lookup(
        if (error)
                goto out_free_name;
 
-       xfs_itrace_ref(*ipp);
        return 0;
 
 out_free_name:
@@ -1476,8 +1363,8 @@ xfs_create(
        if (error == ENOSPC) {
                /* flush outstanding delalloc blocks and retry */
                xfs_flush_inodes(dp);
-               error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
-                       XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+               error = xfs_trans_reserve(tp, resblks, log_res, 0,
+                               XFS_TRANS_PERM_LOG_RES, log_count);
        }
        if (error == ENOSPC) {
                /* No space at all so try a "no-allocation" reservation */
@@ -1531,7 +1418,6 @@ xfs_create(
         * At this point, we've gotten a newly allocated inode.
         * It is locked (and joined to the transaction).
         */
-       xfs_itrace_ref(ip);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
        /*
@@ -1991,9 +1877,6 @@ xfs_remove(
        if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
                xfs_filestream_deassociate(ip);
 
-       xfs_itrace_exit(ip);
-       xfs_itrace_exit(dp);
-
  std_return:
        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
                XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
@@ -2206,7 +2089,8 @@ xfs_symlink(
        if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
                                        DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
-                                       link_name->name, target_path, 0, 0, 0);
+                                       link_name->name,
+                                       (unsigned char *)target_path, 0, 0, 0);
                if (error)
                        return error;
        }
@@ -2290,7 +2174,6 @@ xfs_symlink(
                        goto error_return;
                goto error1;
        }
-       xfs_itrace_ref(ip);
 
        /*
         * An error after we've joined dp to the transaction will result in the
@@ -2403,7 +2286,8 @@ std_return:
                                        dp, DM_RIGHT_NULL,
                                        error ? NULL : ip,
                                        DM_RIGHT_NULL, link_name->name,
-                                       target_path, 0, error, 0);
+                                       (unsigned char *)target_path,
+                                       0, error, 0);
        }
 
        if (!error)
@@ -2461,52 +2345,6 @@ xfs_set_dmattrs(
        return error;
 }
 
-int
-xfs_reclaim(
-       xfs_inode_t     *ip)
-{
-
-       xfs_itrace_entry(ip);
-
-       ASSERT(!VN_MAPPED(VFS_I(ip)));
-
-       /* bad inode, get out here ASAP */
-       if (is_bad_inode(VFS_I(ip))) {
-               xfs_ireclaim(ip);
-               return 0;
-       }
-
-       xfs_ioend_wait(ip);
-
-       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
-       /*
-        * Make sure the atime in the XFS inode is correct before freeing the
-        * Linux inode.
-        */
-       xfs_synchronize_atime(ip);
-
-       /*
-        * If we have nothing to flush with this inode then complete the
-        * teardown now, otherwise break the link between the xfs inode and the
-        * linux inode and clean up the xfs inode later. This avoids flushing
-        * the inode to disk during the delete operation itself.
-        *
-        * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
-        * first to ensure that xfs_iunpin() will never see an xfs inode
-        * that has a linux inode being reclaimed. Synchronisation is provided
-        * by the i_flags_lock.
-        */
-       if (!ip->i_update_core && (ip->i_itemp == NULL)) {
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               xfs_iflock(ip);
-               xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-               return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
-       }
-       xfs_inode_set_reclaim_tag(ip);
-       return 0;
-}
-
 /*
  * xfs_alloc_file_space()
  *      This routine allocates disk space for the given file.
@@ -2879,7 +2717,6 @@ xfs_free_file_space(
        ioffset = offset & ~(rounding - 1);
 
        if (VN_CACHED(VFS_I(ip)) != 0) {
-               xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
                error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
                if (error)
                        goto out_unlock_iolock;