Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[safe/jmp/linux-2.6] / fs / xfs / linux-2.6 / xfs_super.c
index c71e226..29f1edc 100644 (file)
@@ -15,6 +15,7 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+
 #include "xfs.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
@@ -43,7 +44,6 @@
 #include "xfs_itable.h"
 #include "xfs_fsops.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_trans_priv.h"
 #include "xfs_filestream.h"
 #include "xfs_da_btree.h"
-#include "xfs_dir2_trace.h"
 #include "xfs_extfree_item.h"
 #include "xfs_mru_cache.h"
 #include "xfs_inode_item.h"
 #include "xfs_sync.h"
+#include "xfs_trace.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/mempool.h>
 #include <linux/writeback.h>
@@ -68,8 +69,7 @@
 #include <linux/freezer.h>
 #include <linux/parser.h>
 
-static struct quotactl_ops xfs_quotactl_operations;
-static struct super_operations xfs_super_operations;
+static const struct super_operations xfs_super_operations;
 static kmem_zone_t *xfs_ioend_zone;
 mempool_t *xfs_ioend_pool;
 
@@ -79,7 +79,6 @@ mempool_t *xfs_ioend_pool;
 #define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
 #define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
 #define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
-#define MNTOPT_INO64   "ino64"         /* force inodes into 64-bit range */
 #define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
 #define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
 #define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
@@ -180,7 +179,7 @@ xfs_parseargs(
        int                     dswidth = 0;
        int                     iosize = 0;
        int                     dmapi_implies_ikeep = 1;
-       uchar_t                 iosizelog = 0;
+       __uint8_t               iosizelog = 0;
 
        /*
         * Copy binary VFS mount flags we are interested in.
@@ -291,16 +290,6 @@ xfs_parseargs(
                        mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
                } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
                        mp->m_flags |= XFS_MOUNT_NORECOVERY;
-               } else if (!strcmp(this_char, MNTOPT_INO64)) {
-#if XFS_BIG_INUMS
-                       mp->m_flags |= XFS_MOUNT_INO64;
-                       mp->m_inoadd = XFS_INO64_OFFSET;
-#else
-                       cmn_err(CE_WARN,
-                               "XFS: %s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-#endif
                } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
                        mp->m_flags |= XFS_MOUNT_NOALIGN;
                } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
@@ -417,6 +406,14 @@ xfs_parseargs(
                return EINVAL;
        }
 
+#ifndef CONFIG_XFS_QUOTA
+       if (XFS_IS_QUOTA_RUNNING(mp)) {
+               cmn_err(CE_WARN,
+                       "XFS: quota support not available in this kernel.");
+               return EINVAL;
+       }
+#endif
+
        if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
            (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
                cmn_err(CE_WARN,
@@ -529,7 +526,6 @@ xfs_showargs(
                /* the few simple ones we can get from the mount struct */
                { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
                { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
-               { XFS_MOUNT_INO64,              "," MNTOPT_INO64 },
                { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
                { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
                { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
@@ -585,15 +581,19 @@ xfs_showargs(
        else if (mp->m_qflags & XFS_UQUOTA_ACCT)
                seq_puts(m, "," MNTOPT_UQUOTANOENF);
 
-       if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_PRJQUOTA);
-       else if (mp->m_qflags & XFS_PQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_PQUOTANOENF);
-
-       if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_GRPQUOTA);
-       else if (mp->m_qflags & XFS_GQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_GQUOTANOENF);
+       /* Either project or group quotas can be active, not both */
+
+       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_PRJQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
+       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_GRPQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
+       }
 
        if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
                seq_puts(m, "," MNTOPT_NOQUOTA);
@@ -622,7 +622,7 @@ xfs_max_file_offset(
         */
 
 #if BITS_PER_LONG == 32
-# if defined(CONFIG_LBD)
+# if defined(CONFIG_LBDAF)
        ASSERT(sizeof(sector_t) == 8);
        pagefactor = PAGE_CACHE_SIZE;
        bitshift = BITS_PER_LONG;
@@ -634,7 +634,7 @@ xfs_max_file_offset(
        return (((__uint64_t)pagefactor) << bitshift) - 1;
 }
 
-int
+STATIC int
 xfs_blkdev_get(
        xfs_mount_t             *mp,
        const char              *name,
@@ -651,7 +651,7 @@ xfs_blkdev_get(
        return -error;
 }
 
-void
+STATIC void
 xfs_blkdev_put(
        struct block_device     *bdev)
 {
@@ -693,7 +693,7 @@ xfs_barrier_test(
        return error;
 }
 
-void
+STATIC void
 xfs_mountfs_check_barriers(xfs_mount_t *mp)
 {
        int error;
@@ -734,15 +734,15 @@ xfs_close_devices(
 {
        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
                struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-               xfs_free_buftarg(mp->m_logdev_targp);
+               xfs_free_buftarg(mp, mp->m_logdev_targp);
                xfs_blkdev_put(logdev);
        }
        if (mp->m_rtdev_targp) {
                struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-               xfs_free_buftarg(mp->m_rtdev_targp);
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
                xfs_blkdev_put(rtdev);
        }
-       xfs_free_buftarg(mp->m_ddev_targp);
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
 }
 
 /*
@@ -811,9 +811,9 @@ xfs_open_devices(
 
  out_free_rtdev_targ:
        if (mp->m_rtdev_targp)
-               xfs_free_buftarg(mp->m_rtdev_targp);
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
  out_free_ddev_targ:
-       xfs_free_buftarg(mp->m_ddev_targp);
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
  out_close_rtdev:
        if (rtdev)
                xfs_blkdev_put(rtdev);
@@ -872,18 +872,17 @@ xfsaild_wakeup(
        wake_up_process(ailp->xa_task);
 }
 
-int
+STATIC int
 xfsaild(
        void    *data)
 {
        struct xfs_ail  *ailp = data;
        xfs_lsn_t       last_pushed_lsn = 0;
-       long            tout = 0;
+       long            tout = 0; /* milliseconds */
 
        while (!kthread_should_stop()) {
-               if (tout)
-                       schedule_timeout_interruptible(msecs_to_jiffies(tout));
-               tout = 1000;
+               schedule_timeout_interruptible(tout ?
+                               msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
 
                /* swsusp */
                try_to_freeze();
@@ -932,13 +931,37 @@ xfs_fs_alloc_inode(
  */
 STATIC void
 xfs_fs_destroy_inode(
-       struct inode    *inode)
+       struct inode            *inode)
 {
-       xfs_inode_t             *ip = XFS_I(inode);
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       xfs_itrace_entry(ip);
 
        XFS_STATS_INC(vn_reclaim);
-       if (xfs_reclaim(ip))
-               panic("%s: cannot reclaim 0x%p\n", __func__, inode);
+
+       /* bad inode, get out here ASAP */
+       if (is_bad_inode(inode))
+               goto out_reclaim;
+
+       xfs_ioend_wait(ip);
+
+       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+       /*
+        * We should never get here with one of the reclaim flags already set.
+        */
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+       /*
+        * We always use background reclaim here because even if the
+        * inode is clean, it still may be under IO and hence we have
+        * to take the flush lock. The background reclaim path handles
+        * this more efficiently than we can here, so simply let background
+        * reclaim tear down all inodes.
+        */
+out_reclaim:
+       xfs_inode_set_reclaim_tag(ip);
 }
 
 /*
@@ -975,41 +998,142 @@ xfs_fs_inode_init_once(
 
        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                     "xfsino", ip->i_ino);
-       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
 }
 
 /*
- * Attempt to flush the inode, this will actually fail
- * if the inode is pinned, but we dirty the inode again
- * at the point when it is unpinned after a log write,
- * since this is when the inode itself becomes flushable.
+ * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
+ * we catch unlogged VFS level updates to the inode. Care must be taken
+ * here - the transaction code calls mark_inode_dirty_sync() to mark the
+ * VFS inode dirty in a transaction and clears the i_update_core field;
+ * it must clear the field after calling mark_inode_dirty_sync() to
+ * correctly indicate that the dirty state has been propagated into the
+ * inode log item.
+ *
+ * We need the barrier() to maintain correct ordering between unlogged
+ * updates and the transaction commit code that clears the i_update_core
+ * field. This requires all updates to be completed before marking the
+ * inode dirty.
  */
+STATIC void
+xfs_fs_dirty_inode(
+       struct inode    *inode)
+{
+       barrier();
+       XFS_I(inode)->i_update_core = 1;
+}
+
+STATIC int
+xfs_log_inode(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               /* we need to return with the lock hold shared */
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               return error;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Note - it's possible that we might have pushed ourselves out of the
+        * way during trans_reserve which would flush the inode.  But there's
+        * no guarantee that the inode buffer has actually gone out yet (it's
+        * delwri).  Plus the buffer could be pinned anyway if it's part of
+        * an inode in another recent transaction.  So we play it safe and
+        * fire off the transaction anyway.
+        */
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_ihold(tp, ip);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+       xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+
+       return error;
+}
+
 STATIC int
 xfs_fs_write_inode(
        struct inode            *inode,
-       int                     sync)
+       struct writeback_control *wbc)
 {
        struct xfs_inode        *ip = XFS_I(inode);
-       int                     error = 0;
-       int                     flags = 0;
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = EAGAIN;
 
        xfs_itrace_entry(ip);
-       if (sync) {
-               error = xfs_wait_on_pages(ip, 0, -1);
-               if (error)
-                       goto out_error;
-               flags |= FLUSH_SYNC;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       if (wbc->sync_mode == WB_SYNC_ALL) {
+               /*
+                * Make sure the inode has hit stable storage.  By using the
+                * log and the fsync transactions we reduce the IOs we have
+                * to do here from two (log and inode) to just the log.
+                *
+                * Note: We still need to do a delwri write of the inode after
+                * this to flush it to the backing buffer so that bulkstat
+                * works properly if this is the first time the inode has been
+                * written.  Because we hold the ilock atomically over the
+                * transaction commit and the inode flush we are guaranteed
+                * that the inode is not pinned when it returns. If the flush
+                * lock is already held, then the inode has already been
+                * flushed once and we don't need to flush it again.  Hence
+                * the code will only flush the inode if it isn't already
+                * being flushed.
+                */
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               if (ip->i_update_core) {
+                       error = xfs_log_inode(ip);
+                       if (error)
+                               goto out_unlock;
+               }
+       } else {
+               /*
+                * We make this non-blocking if the inode is contended, return
+                * EAGAIN to indicate to the caller that they did not succeed.
+                * This prevents the flush path from blocking on inodes inside
+                * another operation right now, they get caught later by xfs_sync.
+                */
+               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
+                       goto out;
+       }
+
+       if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+               goto out_unlock;
+
+       /*
+        * Now we have the flush lock and the inode is not pinned, we can check
+        * if the inode is really clean as we know that there are no pending
+        * transaction completions, it is not waiting on the delayed write
+        * queue and there is no IO in progress.
+        */
+       if (xfs_inode_clean(ip)) {
+               xfs_ifunlock(ip);
+               error = 0;
+               goto out_unlock;
        }
-       error = xfs_inode_flush(ip, flags);
+       error = xfs_iflush(ip, 0);
 
-out_error:
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ out:
        /*
         * if we failed to write out the inode then mark
         * it dirty again so we'll try again later.
         */
        if (error)
                xfs_mark_inode_dirty_sync(ip);
-
        return -error;
 }
 
@@ -1024,6 +1148,20 @@ xfs_fs_clear_inode(
        XFS_STATS_INC(vn_remove);
        XFS_STATS_DEC(vn_active);
 
+       /*
+        * The iolock is used by the file system to coordinate reads,
+        * writes, and block truncates.  Up to this point the lock
+        * protected concurrent accesses by users of the inode.  But
+        * from here forward we're doing some final processing of the
+        * inode because we're done with it, and although we reuse the
+        * iolock for protection it is really a distinct lock class
+        * (in the lockdep sense) from before.  To keep lockdep happy
+        * (and basically indicate what we are doing), we explicitly
+        * re-init the iolock here.
+        */
+       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+
        xfs_inactive(ip);
 }
 
@@ -1041,26 +1179,22 @@ xfs_fs_put_super(
        struct super_block      *sb)
 {
        struct xfs_mount        *mp = XFS_M(sb);
-       struct xfs_inode        *rip = mp->m_rootip;
-       int                     unmount_event_flags = 0;
 
        xfs_syncd_stop(mp);
-       xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
 
-#ifdef HAVE_DMAPI
-       if (mp->m_flags & XFS_MOUNT_DMAPI) {
-               unmount_event_flags =
-                       (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
-                               0 : DM_FLAGS_UNWANTED;
+       if (!(sb->s_flags & MS_RDONLY)) {
                /*
-                * Ignore error from dmapi here, first unmount is not allowed
-                * to fail anyway, and second we wouldn't want to fail a
-                * unmount because of dmapi.
+                * XXX(hch): this should be SYNC_WAIT.
+                *
+                * Or more likely not needed at all because the VFS is already
+                * calling ->sync_fs after shutting down all filestem
+                * operations and just before calling ->put_super.
                 */
-               XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
-                               NULL, NULL, 0, 0, unmount_event_flags);
+               xfs_sync_data(mp, 0);
+               xfs_sync_attr(mp, 0);
        }
-#endif
+
+       XFS_SEND_PREUNMOUNT(mp);
 
        /*
         * Blow away any referenced inode in the filestreams cache.
@@ -1071,32 +1205,20 @@ xfs_fs_put_super(
 
        XFS_bflush(mp->m_ddev_targp);
 
-       if (mp->m_flags & XFS_MOUNT_DMAPI) {
-               XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
-                               unmount_event_flags);
-       }
+       XFS_SEND_UNMOUNT(mp);
 
        xfs_unmountfs(mp);
        xfs_freesb(mp);
+       xfs_inode_shrinker_unregister(mp);
        xfs_icsb_destroy_counters(mp);
        xfs_close_devices(mp);
-       xfs_qmops_put(mp);
        xfs_dmops_put(mp);
        xfs_free_fsname(mp);
        kfree(mp);
 }
 
-STATIC void
-xfs_fs_write_super(
-       struct super_block      *sb)
-{
-       if (!(sb->s_flags & MS_RDONLY))
-               xfs_sync_fsdata(XFS_M(sb), 0);
-       sb->s_dirt = 0;
-}
-
 STATIC int
-xfs_fs_sync_super(
+xfs_fs_sync_fs(
        struct super_block      *sb,
        int                     wait)
 {
@@ -1104,24 +1226,23 @@ xfs_fs_sync_super(
        int                     error;
 
        /*
-        * Treat a sync operation like a freeze.  This is to work
-        * around a race in sync_inodes() which works in two phases
-        * - an asynchronous flush, which can write out an inode
-        * without waiting for file size updates to complete, and a
-        * synchronous flush, which wont do anything because the
-        * async flush removed the inode's dirty flag.  Also
-        * sync_inodes() will not see any files that just have
-        * outstanding transactions to be flushed because we don't
-        * dirty the Linux inode until after the transaction I/O
-        * completes.
+        * Not much we can do for the first async pass.  Writing out the
+        * superblock would be counter-productive as we are going to redirty
+        * when writing out other data and metadata (and writing out a single
+        * block is quite fast anyway).
+        *
+        * Try to asynchronously kick off quota syncing at least.
         */
-       if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
-               error = xfs_quiesce_data(mp);
-       else
-               error = xfs_sync_fsdata(mp, 0);
-       sb->s_dirt = 0;
+       if (!wait) {
+               xfs_qm_sync(mp, SYNC_TRYLOCK);
+               return 0;
+       }
+
+       error = xfs_quiesce_data(mp);
+       if (error)
+               return -error;
 
-       if (unlikely(laptop_mode)) {
+       if (laptop_mode) {
                int     prev_sync_seq = mp->m_sync_seq;
 
                /*
@@ -1140,7 +1261,7 @@ xfs_fs_sync_super(
                                mp->m_sync_seq != prev_sync_seq);
        }
 
-       return -error;
+       return 0;
 }
 
 STATIC int
@@ -1150,6 +1271,7 @@ xfs_fs_statfs(
 {
        struct xfs_mount        *mp = XFS_M(dentry->d_sb);
        xfs_sb_t                *sbp = &mp->m_sb;
+       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
        __uint64_t              fakeinos, id;
        xfs_extlen_t            lsize;
 
@@ -1169,25 +1291,45 @@ xfs_fs_statfs(
        statp->f_bfree = statp->f_bavail =
                                sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
        fakeinos = statp->f_bfree << sbp->sb_inopblog;
-#if XFS_BIG_INUMS
-       fakeinos += mp->m_inoadd;
-#endif
        statp->f_files =
            MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
        if (mp->m_maxicount)
-#if XFS_BIG_INUMS
-               if (!mp->m_inoadd)
-#endif
-                       statp->f_files = min_t(typeof(statp->f_files),
-                                               statp->f_files,
-                                               mp->m_maxicount);
+               statp->f_files = min_t(typeof(statp->f_files),
+                                       statp->f_files,
+                                       mp->m_maxicount);
        statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
        spin_unlock(&mp->m_sb_lock);
 
-       XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+               xfs_qm_statvfs(ip, statp);
        return 0;
 }
 
+STATIC void
+xfs_save_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks = 0;
+
+       mp->m_resblks_save = mp->m_resblks;
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC void
+xfs_restore_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks;
+
+       if (mp->m_resblks_save) {
+               resblks = mp->m_resblks_save;
+               mp->m_resblks_save = 0;
+       } else
+               resblks = xfs_default_resblks(mp);
+
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
 STATIC int
 xfs_fs_remount(
        struct super_block      *sb,
@@ -1267,11 +1409,27 @@ xfs_fs_remount(
                        }
                        mp->m_update_flags = 0;
                }
+
+               /*
+                * Fill out the reserve pool if it is empty. Use the stashed
+                * value if it is non-zero, otherwise go with the default.
+                */
+               xfs_restore_resvblks(mp);
        }
 
        /* rw -> ro */
        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+               /*
+                * After we have synced the data but before we sync the
+                * metadata, we need to free up the reserve block pool so that
+                * the used block count in the superblock on disk is correct at
+                * the end of the remount. Stash the current reserve pool size
+                * so that if we get remounted rw, we can return it to the same
+                * size.
+                */
+
                xfs_quiesce_data(mp);
+               xfs_save_resvblks(mp);
                xfs_quiesce_attr(mp);
                mp->m_flags |= XFS_MOUNT_RDONLY;
        }
@@ -1290,67 +1448,27 @@ xfs_fs_freeze(
 {
        struct xfs_mount        *mp = XFS_M(sb);
 
+       xfs_save_resvblks(mp);
        xfs_quiesce_attr(mp);
        return -xfs_fs_log_dummy(mp);
 }
 
 STATIC int
-xfs_fs_show_options(
-       struct seq_file         *m,
-       struct vfsmount         *mnt)
-{
-       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
-}
-
-STATIC int
-xfs_fs_quotasync(
-       struct super_block      *sb,
-       int                     type)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XQUOTASYNC, 0, NULL);
-}
-
-STATIC int
-xfs_fs_getxstate(
-       struct super_block      *sb,
-       struct fs_quota_stat    *fqs)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XGETQSTAT, 0, (caddr_t)fqs);
-}
-
-STATIC int
-xfs_fs_setxstate(
-       struct super_block      *sb,
-       unsigned int            flags,
-       int                     op)
+xfs_fs_unfreeze(
+       struct super_block      *sb)
 {
-       return -XFS_QM_QUOTACTL(XFS_M(sb), op, 0, (caddr_t)&flags);
-}
+       struct xfs_mount        *mp = XFS_M(sb);
 
-STATIC int
-xfs_fs_getxquota(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb),
-                                (type == USRQUOTA) ? Q_XGETQUOTA :
-                                 ((type == GRPQUOTA) ? Q_XGETGQUOTA :
-                                  Q_XGETPQUOTA), id, (caddr_t)fdq);
+       xfs_restore_resvblks(mp);
+       return 0;
 }
 
 STATIC int
-xfs_fs_setxquota(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
+xfs_fs_show_options(
+       struct seq_file         *m,
+       struct vfsmount         *mnt)
 {
-       return -XFS_QM_QUOTACTL(XFS_M(sb),
-                                (type == USRQUOTA) ? Q_XSETQLIM :
-                                 ((type == GRPQUOTA) ? Q_XSETGQLIM :
-                                  Q_XSETPQLIM), id, (caddr_t)fdq);
+       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
 }
 
 /*
@@ -1435,22 +1553,21 @@ xfs_fs_fill_super(
        sb_min_blocksize(sb, BBSIZE);
        sb->s_xattr = xfs_xattr_handlers;
        sb->s_export_op = &xfs_export_operations;
+#ifdef CONFIG_XFS_QUOTA
        sb->s_qcop = &xfs_quotactl_operations;
+#endif
        sb->s_op = &xfs_super_operations;
 
        error = xfs_dmops_get(mp);
        if (error)
                goto out_free_fsname;
-       error = xfs_qmops_get(mp);
-       if (error)
-               goto out_put_dmops;
 
        if (silent)
                flags |= XFS_MFSI_QUIET;
 
        error = xfs_open_devices(mp);
        if (error)
-               goto out_put_qmops;
+               goto out_put_dmops;
 
        if (xfs_icsb_init_counters(mp))
                mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1480,7 +1597,6 @@ xfs_fs_fill_super(
 
        XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
 
-       sb->s_dirt = 1;
        sb->s_magic = XFS_SB_MAGIC;
        sb->s_blocksize = mp->m_sb.sb_blocksize;
        sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1507,9 +1623,9 @@ xfs_fs_fill_super(
        if (error)
                goto fail_vnrele;
 
-       kfree(mtpt);
+       xfs_inode_shrinker_register(mp);
 
-       xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
+       kfree(mtpt);
        return 0;
 
  out_filestream_unmount:
@@ -1519,8 +1635,6 @@ xfs_fs_fill_super(
  out_destroy_counters:
        xfs_icsb_destroy_counters(mp);
        xfs_close_devices(mp);
- out_put_qmops:
-       xfs_qmops_put(mp);
  out_put_dmops:
        xfs_dmops_put(mp);
  out_free_fsname:
@@ -1564,28 +1678,21 @@ xfs_fs_get_sb(
                           mnt);
 }
 
-static struct super_operations xfs_super_operations = {
+static const struct super_operations xfs_super_operations = {
        .alloc_inode            = xfs_fs_alloc_inode,
        .destroy_inode          = xfs_fs_destroy_inode,
+       .dirty_inode            = xfs_fs_dirty_inode,
        .write_inode            = xfs_fs_write_inode,
        .clear_inode            = xfs_fs_clear_inode,
        .put_super              = xfs_fs_put_super,
-       .write_super            = xfs_fs_write_super,
-       .sync_fs                = xfs_fs_sync_super,
+       .sync_fs                = xfs_fs_sync_fs,
        .freeze_fs              = xfs_fs_freeze,
+       .unfreeze_fs            = xfs_fs_unfreeze,
        .statfs                 = xfs_fs_statfs,
        .remount_fs             = xfs_fs_remount,
        .show_options           = xfs_fs_show_options,
 };
 
-static struct quotactl_ops xfs_quotactl_operations = {
-       .quota_sync             = xfs_fs_quotasync,
-       .get_xstate             = xfs_fs_getxstate,
-       .set_xstate             = xfs_fs_setxstate,
-       .get_xquota             = xfs_fs_getxquota,
-       .set_xquota             = xfs_fs_setxquota,
-};
-
 static struct file_system_type xfs_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "xfs",
@@ -1595,94 +1702,6 @@ static struct file_system_type xfs_fs_type = {
 };
 
 STATIC int __init
-xfs_alloc_trace_bufs(void)
-{
-#ifdef XFS_ALLOC_TRACE
-       xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
-       if (!xfs_alloc_trace_buf)
-               goto out;
-#endif
-#ifdef XFS_BMAP_TRACE
-       xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
-       if (!xfs_bmap_trace_buf)
-               goto out_free_alloc_trace;
-#endif
-#ifdef XFS_BTREE_TRACE
-       xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
-                                            KM_MAYFAIL);
-       if (!xfs_allocbt_trace_buf)
-               goto out_free_bmap_trace;
-
-       xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
-       if (!xfs_inobt_trace_buf)
-               goto out_free_allocbt_trace;
-
-       xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
-       if (!xfs_bmbt_trace_buf)
-               goto out_free_inobt_trace;
-#endif
-#ifdef XFS_ATTR_TRACE
-       xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
-       if (!xfs_attr_trace_buf)
-               goto out_free_bmbt_trace;
-#endif
-#ifdef XFS_DIR2_TRACE
-       xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
-       if (!xfs_dir2_trace_buf)
-               goto out_free_attr_trace;
-#endif
-
-       return 0;
-
-#ifdef XFS_DIR2_TRACE
- out_free_attr_trace:
-#endif
-#ifdef XFS_ATTR_TRACE
-       ktrace_free(xfs_attr_trace_buf);
- out_free_bmbt_trace:
-#endif
-#ifdef XFS_BTREE_TRACE
-       ktrace_free(xfs_bmbt_trace_buf);
- out_free_inobt_trace:
-       ktrace_free(xfs_inobt_trace_buf);
- out_free_allocbt_trace:
-       ktrace_free(xfs_allocbt_trace_buf);
- out_free_bmap_trace:
-#endif
-#ifdef XFS_BMAP_TRACE
-       ktrace_free(xfs_bmap_trace_buf);
- out_free_alloc_trace:
-#endif
-#ifdef XFS_ALLOC_TRACE
-       ktrace_free(xfs_alloc_trace_buf);
- out:
-#endif
-       return -ENOMEM;
-}
-
-STATIC void
-xfs_free_trace_bufs(void)
-{
-#ifdef XFS_DIR2_TRACE
-       ktrace_free(xfs_dir2_trace_buf);
-#endif
-#ifdef XFS_ATTR_TRACE
-       ktrace_free(xfs_attr_trace_buf);
-#endif
-#ifdef XFS_BTREE_TRACE
-       ktrace_free(xfs_bmbt_trace_buf);
-       ktrace_free(xfs_inobt_trace_buf);
-       ktrace_free(xfs_allocbt_trace_buf);
-#endif
-#ifdef XFS_BMAP_TRACE
-       ktrace_free(xfs_bmap_trace_buf);
-#endif
-#ifdef XFS_ALLOC_TRACE
-       ktrace_free(xfs_alloc_trace_buf);
-#endif
-}
-
-STATIC int __init
 xfs_init_zones(void)
 {
 
@@ -1763,18 +1782,8 @@ xfs_init_zones(void)
        if (!xfs_ili_zone)
                goto out_destroy_inode_zone;
 
-#ifdef CONFIG_XFS_POSIX_ACL
-       xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
-       if (!xfs_acl_zone)
-               goto out_destroy_ili_zone;
-#endif
-
        return 0;
 
-#ifdef CONFIG_XFS_POSIX_ACL
- out_destroy_ili_zone:
-#endif
-       kmem_zone_destroy(xfs_ili_zone);
  out_destroy_inode_zone:
        kmem_zone_destroy(xfs_inode_zone);
  out_destroy_efi_zone:
@@ -1808,9 +1817,6 @@ xfs_init_zones(void)
 STATIC void
 xfs_destroy_zones(void)
 {
-#ifdef CONFIG_XFS_POSIX_ACL
-       kmem_zone_destroy(xfs_acl_zone);
-#endif
        kmem_zone_destroy(xfs_ili_zone);
        kmem_zone_destroy(xfs_inode_zone);
        kmem_zone_destroy(xfs_efi_zone);
@@ -1836,7 +1842,6 @@ init_xfs_fs(void)
        printk(KERN_INFO XFS_VERSION_STRING " with "
                         XFS_BUILD_OPTIONS " enabled\n");
 
-       ktrace_init(64);
        xfs_ioend_init();
        xfs_dir_startup();
 
@@ -1844,13 +1849,9 @@ init_xfs_fs(void)
        if (error)
                goto out;
 
-       error = xfs_alloc_trace_bufs();
-       if (error)
-               goto out_destroy_zones;
-
        error = xfs_mru_cache_init();
        if (error)
-               goto out_free_trace_buffers;
+               goto out_destroy_zones;
 
        error = xfs_filestream_init();
        if (error)
@@ -1869,6 +1870,7 @@ init_xfs_fs(void)
                goto out_cleanup_procfs;
 
        vfs_initquota();
+       xfs_inode_shrinker_init();
 
        error = register_filesystem(&xfs_fs_type);
        if (error)
@@ -1885,8 +1887,6 @@ init_xfs_fs(void)
        xfs_filestream_uninit();
  out_mru_cache_uninit:
        xfs_mru_cache_uninit();
- out_free_trace_buffers:
-       xfs_free_trace_bufs();
  out_destroy_zones:
        xfs_destroy_zones();
  out:
@@ -1898,14 +1898,13 @@ exit_xfs_fs(void)
 {
        vfs_exitquota();
        unregister_filesystem(&xfs_fs_type);
+       xfs_inode_shrinker_destroy();
        xfs_sysctl_unregister();
        xfs_cleanup_procfs();
        xfs_buf_terminate();
        xfs_filestream_uninit();
        xfs_mru_cache_uninit();
-       xfs_free_trace_bufs();
        xfs_destroy_zones();
-       ktrace_uninit();
 }
 
 module_init(init_xfs_fs);