xfs: Don't wake the aild once per second
[safe/jmp/linux-2.6] / fs / xfs / linux-2.6 / xfs_super.c
index 4b6ddf8..9f2e398 100644 (file)
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+
 #include "xfs.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
-#include "xfs_clnt.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -36,6 +36,7 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
 #include "xfs_itable.h"
 #include "xfs_fsops.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_vnodeops.h"
-#include "xfs_vfsops.h"
 #include "xfs_version.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
 #include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
+#include "xfs_sync.h"
+#include "xfs_trace.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
 #include <linux/writeback.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/parser.h>
 
-static struct quotactl_ops xfs_quotactl_operations;
-static struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_vnode_zone;
+static const struct super_operations xfs_super_operations;
 static kmem_zone_t *xfs_ioend_zone;
 mempool_t *xfs_ioend_pool;
 
-STATIC struct xfs_mount_args *
-xfs_args_allocate(
-       struct super_block      *sb,
-       int                     silent)
-{
-       struct xfs_mount_args   *args;
-
-       args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
-       args->logbufs = args->logbufsize = -1;
-       strncpy(args->fsname, sb->s_id, MAXNAMELEN);
-
-       /* Copy the already-parsed mount(2) flags we're interested in */
-       if (sb->s_flags & MS_DIRSYNC)
-               args->flags |= XFSMNT_DIRSYNC;
-       if (sb->s_flags & MS_SYNCHRONOUS)
-               args->flags |= XFSMNT_WSYNC;
-       if (silent)
-               args->flags |= XFSMNT_QUIET;
-       args->flags |= XFSMNT_32BITINODES;
-
-       return args;
-}
-
 #define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
 #define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
 #define MNTOPT_LOGDEV  "logdev"        /* log device */
 #define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
 #define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
 #define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
-#define MNTOPT_INO64   "ino64"         /* force inodes into 64-bit range */
 #define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
 #define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
 #define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
@@ -139,6 +119,23 @@ xfs_args_allocate(
 #define MNTOPT_XDSM    "xdsm"          /* DMI enabled (DMAPI / XDSM) */
 #define MNTOPT_DMI     "dmi"           /* DMI enabled (DMAPI / XDSM) */
 
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+       Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static const match_table_t tokens = {
+       {Opt_barrier, "barrier"},
+       {Opt_nobarrier, "nobarrier"},
+       {Opt_err, NULL}
+};
+
+
 STATIC unsigned long
 suffix_strtoul(char *s, char **endp, unsigned int base)
 {
@@ -162,26 +159,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
        return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
 }
 
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure.  The caller takes care of them.
+ */
 STATIC int
 xfs_parseargs(
        struct xfs_mount        *mp,
        char                    *options,
-       struct xfs_mount_args   *args,
-       int                     update)
+       char                    **mtpt)
 {
+       struct super_block      *sb = mp->m_super;
        char                    *this_char, *value, *eov;
-       int                     dsunit, dswidth, vol_dsunit, vol_dswidth;
-       int                     iosize;
+       int                     dsunit = 0;
+       int                     dswidth = 0;
+       int                     iosize = 0;
        int                     dmapi_implies_ikeep = 1;
+       __uint8_t               iosizelog = 0;
+
+       /*
+        * Copy binary VFS mount flags we are interested in.
+        */
+       if (sb->s_flags & MS_RDONLY)
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+       if (sb->s_flags & MS_DIRSYNC)
+               mp->m_flags |= XFS_MOUNT_DIRSYNC;
+       if (sb->s_flags & MS_SYNCHRONOUS)
+               mp->m_flags |= XFS_MOUNT_WSYNC;
+
+       /*
+        * Set some default flags that could be cleared by the mount option
+        * parsing.
+        */
+       mp->m_flags |= XFS_MOUNT_BARRIER;
+       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 
-       args->flags |= XFSMNT_BARRIER;
-       args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+       /*
+        * These can be overridden by the mount option parsing.
+        */
+       mp->m_logbufs = -1;
+       mp->m_logbsize = -1;
 
        if (!options)
                goto done;
 
-       iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
-
        while ((this_char = strsep(&options, ",")) != NULL) {
                if (!*this_char)
                        continue;
@@ -195,7 +220,7 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                       args->logbufs = simple_strtoul(value, &eov, 10);
+                       mp->m_logbufs = simple_strtoul(value, &eov, 10);
                } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -203,7 +228,7 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                       args->logbufsize = suffix_strtoul(value, &eov, 10);
+                       mp->m_logbsize = suffix_strtoul(value, &eov, 10);
                } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -211,7 +236,9 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                       strncpy(args->logname, value, MAXNAMELEN);
+                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_logname)
+                               return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_MTPT)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -219,7 +246,9 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                       strncpy(args->mtpt, value, MAXNAMELEN);
+                       *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!*mtpt)
+                               return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -227,7 +256,9 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                       strncpy(args->rtname, value, MAXNAMELEN);
+                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_rtname)
+                               return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -236,8 +267,7 @@ xfs_parseargs(
                                return EINVAL;
                        }
                        iosize = simple_strtoul(value, &eov, 10);
-                       args->flags |= XFSMNT_IOSIZE;
-                       args->iosizelog = (uint8_t) iosize;
+                       iosizelog = ffs(iosize) - 1;
                } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -246,8 +276,7 @@ xfs_parseargs(
                                return EINVAL;
                        }
                        iosize = suffix_strtoul(value, &eov, 10);
-                       args->flags |= XFSMNT_IOSIZE;
-                       args->iosizelog = ffs(iosize) - 1;
+                       iosizelog = ffs(iosize) - 1;
                } else if (!strcmp(this_char, MNTOPT_GRPID) ||
                           !strcmp(this_char, MNTOPT_BSDGROUPS)) {
                        mp->m_flags |= XFS_MOUNT_GRPID;
@@ -255,23 +284,15 @@ xfs_parseargs(
                           !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
                        mp->m_flags &= ~XFS_MOUNT_GRPID;
                } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
-                       args->flags |= XFSMNT_WSYNC;
+                       mp->m_flags |= XFS_MOUNT_WSYNC;
                } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
-                       args->flags |= XFSMNT_OSYNCISOSYNC;
+                       mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
                } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
-                       args->flags |= XFSMNT_NORECOVERY;
-               } else if (!strcmp(this_char, MNTOPT_INO64)) {
-                       args->flags |= XFSMNT_INO64;
-#if !XFS_BIG_INUMS
-                       cmn_err(CE_WARN,
-                               "XFS: %s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-#endif
+                       mp->m_flags |= XFS_MOUNT_NORECOVERY;
                } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
-                       args->flags |= XFSMNT_NOALIGN;
+                       mp->m_flags |= XFS_MOUNT_NOALIGN;
                } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
-                       args->flags |= XFSMNT_SWALLOC;
+                       mp->m_flags |= XFS_MOUNT_SWALLOC;
                } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -289,7 +310,7 @@ xfs_parseargs(
                        }
                        dswidth = simple_strtoul(value, &eov, 10);
                } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
-                       args->flags &= ~XFSMNT_32BITINODES;
+                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 #if !XFS_BIG_INUMS
                        cmn_err(CE_WARN,
                                "XFS: %s option not allowed on this system",
@@ -297,56 +318,61 @@ xfs_parseargs(
                        return EINVAL;
 #endif
                } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
-                       args->flags |= XFSMNT_NOUUID;
+                       mp->m_flags |= XFS_MOUNT_NOUUID;
                } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
-                       args->flags |= XFSMNT_BARRIER;
+                       mp->m_flags |= XFS_MOUNT_BARRIER;
                } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
-                       args->flags &= ~XFSMNT_BARRIER;
+                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
                } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-                       args->flags |= XFSMNT_IKEEP;
+                       mp->m_flags |= XFS_MOUNT_IKEEP;
                } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
                        dmapi_implies_ikeep = 0;
-                       args->flags &= ~XFSMNT_IKEEP;
+                       mp->m_flags &= ~XFS_MOUNT_IKEEP;
                } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
-                       args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
+                       mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
                } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
-                       args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+                       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
                } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-                       args->flags |= XFSMNT_ATTR2;
+                       mp->m_flags |= XFS_MOUNT_ATTR2;
                } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-                       args->flags &= ~XFSMNT_ATTR2;
-                       args->flags |= XFSMNT_NOATTR2;
+                       mp->m_flags &= ~XFS_MOUNT_ATTR2;
+                       mp->m_flags |= XFS_MOUNT_NOATTR2;
                } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
-                       args->flags2 |= XFSMNT2_FILESTREAMS;
+                       mp->m_flags |= XFS_MOUNT_FILESTREAMS;
                } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-                       args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
-                       args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
                           !strcmp(this_char, MNTOPT_UQUOTA) ||
                           !strcmp(this_char, MNTOPT_USRQUOTA)) {
-                       args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                        XFS_UQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
                           !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-                       args->flags |= XFSMNT_UQUOTA;
-                       args->flags &= ~XFSMNT_UQUOTAENF;
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_UQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
                           !strcmp(this_char, MNTOPT_PRJQUOTA)) {
-                       args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
-                       args->flags |= XFSMNT_PQUOTA;
-                       args->flags &= ~XFSMNT_PQUOTAENF;
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
                           !strcmp(this_char, MNTOPT_GRPQUOTA)) {
-                       args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-                       args->flags |= XFSMNT_GQUOTA;
-                       args->flags &= ~XFSMNT_GQUOTAENF;
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
-                       args->flags |= XFSMNT_DMAPI;
+                       mp->m_flags |= XFS_MOUNT_DMAPI;
                } else if (!strcmp(this_char, MNTOPT_XDSM)) {
-                       args->flags |= XFSMNT_DMAPI;
+                       mp->m_flags |= XFS_MOUNT_DMAPI;
                } else if (!strcmp(this_char, MNTOPT_DMI)) {
-                       args->flags |= XFSMNT_DMAPI;
+                       mp->m_flags |= XFS_MOUNT_DMAPI;
                } else if (!strcmp(this_char, "ihashsize")) {
                        cmn_err(CE_WARN,
        "XFS: ihashsize no longer used, option is deprecated.");
@@ -364,27 +390,37 @@ xfs_parseargs(
                }
        }
 
-       if (args->flags & XFSMNT_NORECOVERY) {
-               if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) {
-                       cmn_err(CE_WARN,
-                               "XFS: no-recovery mounts must be read-only.");
-                       return EINVAL;
-               }
+       /*
+        * no recovery flag requires a read-only mount
+        */
+       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+               cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only.");
+               return EINVAL;
        }
 
-       if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
+       if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
                cmn_err(CE_WARN,
        "XFS: sunit and swidth options incompatible with the noalign option");
                return EINVAL;
        }
 
-       if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+#ifndef CONFIG_XFS_QUOTA
+       if (XFS_IS_QUOTA_RUNNING(mp)) {
+               cmn_err(CE_WARN,
+                       "XFS: quota support not available in this kernel.");
+               return EINVAL;
+       }
+#endif
+
+       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
                cmn_err(CE_WARN,
                        "XFS: cannot mount with both project and group quota");
                return EINVAL;
        }
 
-       if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') {
+       if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
                printk("XFS: %s option needs the mount point option as well\n",
                        MNTOPT_DMAPI);
                return EINVAL;
@@ -412,27 +448,66 @@ xfs_parseargs(
         * Note that if "ikeep" or "noikeep" mount options are
         * supplied, then they are honored.
         */
-       if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep)
-               args->flags |= XFSMNT_IKEEP;
+       if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
+               mp->m_flags |= XFS_MOUNT_IKEEP;
 
-       if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+done:
+       if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+               /*
+                * At this point the superblock has not been read
+                * in, therefore we do not know the block size.
+                * Before the mount call ends we will convert
+                * these to FSBs.
+                */
                if (dsunit) {
-                       args->sunit = dsunit;
-                       args->flags |= XFSMNT_RETERR;
-               } else {
-                       args->sunit = vol_dsunit;
+                       mp->m_dalign = dsunit;
+                       mp->m_flags |= XFS_MOUNT_RETERR;
                }
-               dswidth ? (args->swidth = dswidth) :
-                         (args->swidth = vol_dswidth);
-       } else {
-               args->sunit = args->swidth = 0;
+
+               if (dswidth)
+                       mp->m_swidth = dswidth;
+       }
+
+       if (mp->m_logbufs != -1 &&
+           mp->m_logbufs != 0 &&
+           (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+            mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+               cmn_err(CE_WARN,
+                       "XFS: invalid logbufs value: %d [not %d-%d]",
+                       mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+               return XFS_ERROR(EINVAL);
+       }
+       if (mp->m_logbsize != -1 &&
+           mp->m_logbsize !=  0 &&
+           (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+            mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+            !is_power_of_2(mp->m_logbsize))) {
+               cmn_err(CE_WARN,
+       "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+                       mp->m_logbsize);
+               return XFS_ERROR(EINVAL);
+       }
+
+       mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+       if (!mp->m_fsname)
+               return ENOMEM;
+       mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+
+       if (iosizelog) {
+               if (iosizelog > XFS_MAX_IO_LOG ||
+                   iosizelog < XFS_MIN_IO_LOG) {
+                       cmn_err(CE_WARN,
+               "XFS: invalid log iosize: %d [not %d-%d]",
+                               iosizelog, XFS_MIN_IO_LOG,
+                               XFS_MAX_IO_LOG);
+                       return XFS_ERROR(EINVAL);
+               }
+
+               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+               mp->m_readio_log = iosizelog;
+               mp->m_writeio_log = iosizelog;
        }
 
-done:
-       if (args->flags & XFSMNT_32BITINODES)
-               mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-       if (args->flags2)
-               args->flags |= XFSMNT_FLAGS2;
        return 0;
 }
 
@@ -450,7 +525,6 @@ xfs_showargs(
                /* the few simple ones we can get from the mount struct */
                { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
                { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
-               { XFS_MOUNT_INO64,              "," MNTOPT_INO64 },
                { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
                { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
                { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
@@ -506,15 +580,19 @@ xfs_showargs(
        else if (mp->m_qflags & XFS_UQUOTA_ACCT)
                seq_puts(m, "," MNTOPT_UQUOTANOENF);
 
-       if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_PRJQUOTA);
-       else if (mp->m_qflags & XFS_PQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_PQUOTANOENF);
-
-       if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_GRPQUOTA);
-       else if (mp->m_qflags & XFS_GQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_GQUOTANOENF);
+       /* Either project or group quotas can be active, not both */
+
+       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_PRJQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
+       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_GRPQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
+       }
 
        if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
                seq_puts(m, "," MNTOPT_NOQUOTA);
@@ -543,7 +621,7 @@ xfs_max_file_offset(
         */
 
 #if BITS_PER_LONG == 32
-# if defined(CONFIG_LBD)
+# if defined(CONFIG_LBDAF)
        ASSERT(sizeof(sector_t) == 8);
        pagefactor = PAGE_CACHE_SIZE;
        bitshift = BITS_PER_LONG;
@@ -555,116 +633,7 @@ xfs_max_file_offset(
        return (((__uint64_t)pagefactor) << bitshift) - 1;
 }
 
-STATIC_INLINE void
-xfs_set_inodeops(
-       struct inode            *inode)
-{
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFREG:
-               inode->i_op = &xfs_inode_operations;
-               inode->i_fop = &xfs_file_operations;
-               inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       case S_IFDIR:
-               inode->i_op = &xfs_dir_inode_operations;
-               inode->i_fop = &xfs_dir_file_operations;
-               break;
-       case S_IFLNK:
-               inode->i_op = &xfs_symlink_inode_operations;
-               if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE))
-                       inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       default:
-               inode->i_op = &xfs_inode_operations;
-               init_special_inode(inode, inode->i_mode, inode->i_rdev);
-               break;
-       }
-}
-
-STATIC_INLINE void
-xfs_revalidate_inode(
-       xfs_mount_t             *mp,
-       bhv_vnode_t             *vp,
-       xfs_inode_t             *ip)
-{
-       struct inode            *inode = vn_to_inode(vp);
-
-       inode->i_mode   = ip->i_d.di_mode;
-       inode->i_nlink  = ip->i_d.di_nlink;
-       inode->i_uid    = ip->i_d.di_uid;
-       inode->i_gid    = ip->i_d.di_gid;
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFBLK:
-       case S_IFCHR:
-               inode->i_rdev =
-                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-                             sysv_minor(ip->i_df.if_u2.if_rdev));
-               break;
-       default:
-               inode->i_rdev = 0;
-               break;
-       }
-
-       inode->i_generation = ip->i_d.di_gen;
-       i_size_write(inode, ip->i_d.di_size);
-       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
-       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
-       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
-       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
-       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
-       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
-       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
-               inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
-               inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
-               inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
-               inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
-       xfs_iflags_clear(ip, XFS_IMODIFIED);
-}
-
-void
-xfs_initialize_vnode(
-       struct xfs_mount        *mp,
-       bhv_vnode_t             *vp,
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = vn_to_inode(vp);
-
-       if (!ip->i_vnode) {
-               ip->i_vnode = vp;
-               inode->i_private = ip;
-       }
-
-       /*
-        * We need to set the ops vectors, and unlock the inode, but if
-        * we have been called during the new inode create process, it is
-        * too early to fill in the Linux inode.  We will get called a
-        * second time once the inode is properly set up, and then we can
-        * finish our work.
-        */
-       if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) {
-               xfs_revalidate_inode(mp, vp, ip);
-               xfs_set_inodeops(inode);
-
-               xfs_iflags_clear(ip, XFS_INEW);
-               barrier();
-
-               unlock_new_inode(inode);
-       }
-}
-
-int
+STATIC int
 xfs_blkdev_get(
        xfs_mount_t             *mp,
        const char              *name,
@@ -672,7 +641,7 @@ xfs_blkdev_get(
 {
        int                     error = 0;
 
-       *bdevp = open_bdev_excl(name, 0, mp);
+       *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
        if (IS_ERR(*bdevp)) {
                error = PTR_ERR(*bdevp);
                printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@ -681,12 +650,12 @@ xfs_blkdev_get(
        return -error;
 }
 
-void
+STATIC void
 xfs_blkdev_put(
        struct block_device     *bdev)
 {
        if (bdev)
-               close_bdev_excl(bdev);
+               close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
 }
 
 /*
@@ -723,7 +692,7 @@ xfs_barrier_test(
        return error;
 }
 
-void
+STATIC void
 xfs_mountfs_check_barriers(xfs_mount_t *mp)
 {
        int error;
@@ -735,14 +704,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
                return;
        }
 
-       if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
-                                       QUEUE_ORDERED_NONE) {
-               xfs_fs_cmn_err(CE_NOTE, mp,
-                 "Disabling barriers, not supported by the underlying device");
-               mp->m_flags &= ~XFS_MOUNT_BARRIER;
-               return;
-       }
-
        if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, underlying device is readonly");
@@ -766,39 +727,170 @@ xfs_blkdev_issue_flush(
        blkdev_issue_flush(buftarg->bt_bdev, NULL);
 }
 
+STATIC void
+xfs_close_devices(
+       struct xfs_mount        *mp)
+{
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_logdev_targp);
+               xfs_blkdev_put(logdev);
+       }
+       if (mp->m_rtdev_targp) {
+               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+               xfs_blkdev_put(rtdev);
+       }
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ *     (1) device (partition) with data and internal log
+ *     (2) logical volume with data and log subvolumes.
+ *     (3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+       struct xfs_mount        *mp)
+{
+       struct block_device     *ddev = mp->m_super->s_bdev;
+       struct block_device     *logdev = NULL, *rtdev = NULL;
+       int                     error;
+
+       /*
+        * Open real time and log devices - order is important.
+        */
+       if (mp->m_logname) {
+               error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
+               if (error)
+                       goto out;
+       }
+
+       if (mp->m_rtname) {
+               error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
+               if (error)
+                       goto out_close_logdev;
+
+               if (rtdev == ddev || rtdev == logdev) {
+                       cmn_err(CE_WARN,
+       "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
+                       error = EINVAL;
+                       goto out_close_rtdev;
+               }
+       }
+
+       /*
+        * Setup xfs_mount buffer target pointers
+        */
+       error = ENOMEM;
+       mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+       if (!mp->m_ddev_targp)
+               goto out_close_rtdev;
+
+       if (rtdev) {
+               mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+               if (!mp->m_rtdev_targp)
+                       goto out_free_ddev_targ;
+       }
+
+       if (logdev && logdev != ddev) {
+               mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+               if (!mp->m_logdev_targp)
+                       goto out_free_rtdev_targ;
+       } else {
+               mp->m_logdev_targp = mp->m_ddev_targp;
+       }
+
+       return 0;
+
+ out_free_rtdev_targ:
+       if (mp->m_rtdev_targp)
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+ out_free_ddev_targ:
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+ out_close_rtdev:
+       if (rtdev)
+               xfs_blkdev_put(rtdev);
+ out_close_logdev:
+       if (logdev && logdev != ddev)
+               xfs_blkdev_put(logdev);
+ out:
+       return error;
+}
+
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+       struct xfs_mount        *mp)
+{
+       int                     error;
+
+       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+                                   mp->m_sb.sb_sectsize);
+       if (error)
+               return error;
+
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               unsigned int    log_sector_size = BBSIZE;
+
+               if (xfs_sb_version_hassector(&mp->m_sb))
+                       log_sector_size = mp->m_sb.sb_logsectsize;
+               error = xfs_setsize_buftarg(mp->m_logdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           log_sector_size);
+               if (error)
+                       return error;
+       }
+       if (mp->m_rtdev_targp) {
+               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           mp->m_sb.sb_sectsize);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
 /*
  * XFS AIL push thread support
  */
 void
 xfsaild_wakeup(
-       xfs_mount_t             *mp,
+       struct xfs_ail          *ailp,
        xfs_lsn_t               threshold_lsn)
 {
-       mp->m_ail.xa_target = threshold_lsn;
-       wake_up_process(mp->m_ail.xa_task);
+       ailp->xa_target = threshold_lsn;
+       wake_up_process(ailp->xa_task);
 }
 
-int
+STATIC int
 xfsaild(
        void    *data)
 {
-       xfs_mount_t     *mp = (xfs_mount_t *)data;
+       struct xfs_ail  *ailp = data;
        xfs_lsn_t       last_pushed_lsn = 0;
-       long            tout = 0;
+       long            tout = 0; /* milliseconds */
 
        while (!kthread_should_stop()) {
-               if (tout)
-                       schedule_timeout_interruptible(msecs_to_jiffies(tout));
-               tout = 1000;
+               schedule_timeout_interruptible(tout ?
+                               msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
 
                /* swsusp */
                try_to_freeze();
 
-               ASSERT(mp->m_log);
-               if (XFS_FORCED_SHUTDOWN(mp))
+               ASSERT(ailp->xa_mount->m_log);
+               if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
                        continue;
 
-               tout = xfsaild_push(mp, &last_pushed_lsn);
+               tout = xfsaild_push(ailp, &last_pushed_lsn);
        }
 
        return 0;
@@ -806,84 +898,127 @@ xfsaild(
 
 int
 xfsaild_start(
-       xfs_mount_t     *mp)
+       struct xfs_ail  *ailp)
 {
-       mp->m_ail.xa_target = 0;
-       mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild");
-       if (IS_ERR(mp->m_ail.xa_task))
-               return -PTR_ERR(mp->m_ail.xa_task);
+       ailp->xa_target = 0;
+       ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
+       if (IS_ERR(ailp->xa_task))
+               return -PTR_ERR(ailp->xa_task);
        return 0;
 }
 
 void
 xfsaild_stop(
-       xfs_mount_t     *mp)
+       struct xfs_ail  *ailp)
 {
-       kthread_stop(mp->m_ail.xa_task);
+       kthread_stop(ailp->xa_task);
 }
 
 
-
+/* Catch misguided souls that try to use this interface on XFS */
 STATIC struct inode *
 xfs_fs_alloc_inode(
        struct super_block      *sb)
 {
-       bhv_vnode_t             *vp;
-
-       vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
-       if (unlikely(!vp))
-               return NULL;
-       return vn_to_inode(vp);
+       BUG();
+       return NULL;
 }
 
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
 STATIC void
 xfs_fs_destroy_inode(
        struct inode            *inode)
 {
-       kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode));
-}
+       struct xfs_inode        *ip = XFS_I(inode);
 
-STATIC void
-xfs_fs_inode_init_once(
-       void                    *vnode)
-{
-       inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
-}
+       xfs_itrace_entry(ip);
 
-STATIC int __init
-xfs_init_zones(void)
-{
-       xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
-                                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
-                                       KM_ZONE_SPREAD,
-                                       xfs_fs_inode_init_once);
-       if (!xfs_vnode_zone)
-               goto out;
+       XFS_STATS_INC(vn_reclaim);
 
-       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-       if (!xfs_ioend_zone)
-               goto out_destroy_vnode_zone;
+       /* bad inode, get out here ASAP */
+       if (is_bad_inode(inode))
+               goto out_reclaim;
 
-       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-                                                 xfs_ioend_zone);
-       if (!xfs_ioend_pool)
-               goto out_free_ioend_zone;
-       return 0;
+       xfs_ioend_wait(ip);
 
- out_free_ioend_zone:
-       kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
-       kmem_zone_destroy(xfs_vnode_zone);
- out:
-       return -ENOMEM;
+       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+       /*
+        * We should never get here with one of the reclaim flags already set.
+        */
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+       /*
+        * We always use background reclaim here because even if the
+        * inode is clean, it still may be under IO and hence we have
+        * to take the flush lock. The background reclaim path handles
+        * this more efficiently than we can here, so simply let background
+        * reclaim tear down all inodes.
+        */
+out_reclaim:
+       xfs_inode_set_reclaim_tag(ip);
 }
 
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly intialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
 STATIC void
-xfs_destroy_zones(void)
+xfs_fs_inode_init_once(
+       void                    *inode)
 {
-       mempool_destroy(xfs_ioend_pool);
-       kmem_zone_destroy(xfs_vnode_zone);
-       kmem_zone_destroy(xfs_ioend_zone);
+       struct xfs_inode        *ip = inode;
+
+       memset(ip, 0, sizeof(struct xfs_inode));
+
+       /* vfs inode */
+       inode_init_once(VFS_I(ip));
+
+       /* xfs inode */
+       atomic_set(&ip->i_iocount, 0);
+       atomic_set(&ip->i_pincount, 0);
+       spin_lock_init(&ip->i_flags_lock);
+       init_waitqueue_head(&ip->i_ipin_wait);
+       /*
+        * Because we want to use a counting completion, complete
+        * the flush completion once to allow a single access to
+        * the flush completion without blocking.
+        */
+       init_completion(&ip->i_flush);
+       complete(&ip->i_flush);
+
+       mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
+}
+
+/*
+ * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
+ * we catch unlogged VFS level updates to the inode. Care must be taken
+ * here - the transaction code calls mark_inode_dirty_sync() to mark the
+ * VFS inode dirty in a transaction and clears the i_update_core field;
+ * it must clear the field after calling mark_inode_dirty_sync() to
+ * correctly indicate that the dirty state has been propagated into the
+ * inode log item.
+ *
+ * We need the barrier() to maintain correct ordering between unlogged
+ * updates and the transaction commit code that clears the i_update_core
+ * field. This requires all updates to be completed before marking the
+ * inode dirty.
+ */
+STATIC void
+xfs_fs_dirty_inode(
+       struct inode    *inode)
+{
+       barrier();
+       XFS_I(inode)->i_update_core = 1;
 }
 
 /*
@@ -897,22 +1032,58 @@ xfs_fs_write_inode(
        struct inode            *inode,
        int                     sync)
 {
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
        int                     error = 0;
-       int                     flags = 0;
 
-       xfs_itrace_entry(XFS_I(inode));
+       xfs_itrace_entry(ip);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       if (sync) {
+               error = xfs_wait_on_pages(ip, 0, -1);
+               if (error)
+                       goto out;
+       }
+
+       /*
+        * Bypass inodes which have already been cleaned by
+        * the inode flush clustering code inside xfs_iflush
+        */
+       if (xfs_inode_clean(ip))
+               goto out;
+
+       /*
+        * We make this non-blocking if the inode is contended, return
+        * EAGAIN to indicate to the caller that they did not succeed.
+        * This prevents the flush path from blocking on inodes inside
+        * another operation right now, they get caught later by xfs_sync.
+        */
        if (sync) {
-               filemap_fdatawait(inode->i_mapping);
-               flags |= FLUSH_SYNC;
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               xfs_iflock(ip);
+
+               error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+       } else {
+               error = EAGAIN;
+               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
+                       goto out;
+               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+                       goto out_unlock;
+
+               error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
        }
-       error = xfs_inode_flush(XFS_I(inode), flags);
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ out:
        /*
         * if we failed to write out the inode then mark
         * it dirty again so we'll try again later.
         */
        if (error)
-               mark_inode_dirty_sync(inode);
-
+               xfs_mark_inode_dirty_sync(ip);
        return -error;
 }
 
@@ -922,164 +1093,35 @@ xfs_fs_clear_inode(
 {
        xfs_inode_t             *ip = XFS_I(inode);
 
+       xfs_itrace_entry(ip);
+       XFS_STATS_INC(vn_rele);
+       XFS_STATS_INC(vn_remove);
+       XFS_STATS_DEC(vn_active);
+
        /*
-        * ip can be null when xfs_iget_core calls xfs_idestroy if we
-        * find an inode with di_mode == 0 but without IGET_CREATE set.
+        * The iolock is used by the file system to coordinate reads,
+        * writes, and block truncates.  Up to this point the lock
+        * protected concurrent accesses by users of the inode.  But
+        * from here forward we're doing some final processing of the
+        * inode because we're done with it, and although we reuse the
+        * iolock for protection it is really a distinct lock class
+        * (in the lockdep sense) from before.  To keep lockdep happy
+        * (and basically indicate what we are doing), we explicitly
+        * re-init the iolock here.
         */
-       if (ip) {
-               xfs_itrace_entry(ip);
-               XFS_STATS_INC(vn_rele);
-               XFS_STATS_INC(vn_remove);
-               XFS_STATS_INC(vn_reclaim);
-               XFS_STATS_DEC(vn_active);
-
-               xfs_inactive(ip);
-               xfs_iflags_clear(ip, XFS_IMODIFIED);
-               if (xfs_reclaim(ip))
-                       panic("%s: cannot reclaim 0x%p\n", __func__, inode);
-       }
+       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
 
-       ASSERT(XFS_I(inode) == NULL);
+       xfs_inactive(ip);
 }
 
-/*
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
 STATIC void
-xfs_syncd_queue_work(
-       struct xfs_mount *mp,
-       void            *data,
-       void            (*syncer)(struct xfs_mount *, void *))
-{
-       struct bhv_vfs_sync_work *work;
-
-       work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
-       INIT_LIST_HEAD(&work->w_list);
-       work->w_syncer = syncer;
-       work->w_data = data;
-       work->w_mount = mp;
-       spin_lock(&mp->m_sync_lock);
-       list_add_tail(&work->w_list, &mp->m_sync_list);
-       spin_unlock(&mp->m_sync_lock);
-       wake_up_process(mp->m_sync_task);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inode_work(
-       struct xfs_mount *mp,
-       void            *arg)
-{
-       struct inode    *inode = arg;
-       filemap_flush(inode->i_mapping);
-       iput(inode);
-}
-
-void
-xfs_flush_inode(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = ip->i_vnode;
-
-       igrab(inode);
-       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
-       delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
-       struct xfs_mount *mp,
-       void            *arg)
-{
-       struct inode    *inode = arg;
-       sync_blockdev(mp->m_super->s_bdev);
-       iput(inode);
-}
-
-void
-xfs_flush_device(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = vn_to_inode(XFS_ITOV(ip));
-
-       igrab(inode);
-       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
-       delay(msecs_to_jiffies(500));
-       xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
-}
-
-STATIC void
-xfs_sync_worker(
-       struct xfs_mount *mp,
-       void            *unused)
-{
-       int             error;
-
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY))
-               error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
-       mp->m_sync_seq++;
-       wake_up(&mp->m_wait_single_sync_task);
-}
-
-STATIC int
-xfssyncd(
-       void                    *arg)
+xfs_free_fsname(
+       struct xfs_mount        *mp)
 {
-       struct xfs_mount        *mp = arg;
-       long                    timeleft;
-       bhv_vfs_sync_work_t     *work, *n;
-       LIST_HEAD               (tmp);
-
-       set_freezable();
-       timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
-       for (;;) {
-               timeleft = schedule_timeout_interruptible(timeleft);
-               /* swsusp */
-               try_to_freeze();
-               if (kthread_should_stop() && list_empty(&mp->m_sync_list))
-                       break;
-
-               spin_lock(&mp->m_sync_lock);
-               /*
-                * We can get woken by laptop mode, to do a sync -
-                * that's the (only!) case where the list would be
-                * empty with time remaining.
-                */
-               if (!timeleft || list_empty(&mp->m_sync_list)) {
-                       if (!timeleft)
-                               timeleft = xfs_syncd_centisecs *
-                                                       msecs_to_jiffies(10);
-                       INIT_LIST_HEAD(&mp->m_sync_work.w_list);
-                       list_add_tail(&mp->m_sync_work.w_list,
-                                       &mp->m_sync_list);
-               }
-               list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
-                       list_move(&work->w_list, &tmp);
-               spin_unlock(&mp->m_sync_lock);
-
-               list_for_each_entry_safe(work, n, &tmp, w_list) {
-                       (*work->w_syncer)(mp, work->w_data);
-                       list_del(&work->w_list);
-                       if (work == &mp->m_sync_work)
-                               continue;
-                       kmem_free(work);
-               }
-       }
-
-       return 0;
+       kfree(mp->m_fsname);
+       kfree(mp->m_rtname);
+       kfree(mp->m_logname);
 }
 
 STATIC void
@@ -1087,28 +1129,22 @@ xfs_fs_put_super(
        struct super_block      *sb)
 {
        struct xfs_mount        *mp = XFS_M(sb);
-       struct xfs_inode        *rip = mp->m_rootip;
-       int                     unmount_event_flags = 0;
-       int                     error;
-
-       kthread_stop(mp->m_sync_task);
 
-       xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
+       xfs_syncd_stop(mp);
 
-#ifdef HAVE_DMAPI
-       if (mp->m_flags & XFS_MOUNT_DMAPI) {
-               unmount_event_flags =
-                       (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
-                               0 : DM_FLAGS_UNWANTED;
+       if (!(sb->s_flags & MS_RDONLY)) {
                /*
-                * Ignore error from dmapi here, first unmount is not allowed
-                * to fail anyway, and second we wouldn't want to fail a
-                * unmount because of dmapi.
+                * XXX(hch): this should be SYNC_WAIT.
+                *
+                * Or more likely not needed at all because the VFS is already
+                * calling ->sync_fs after shutting down all filestem
+                * operations and just before calling ->put_super.
                 */
-               XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
-                               NULL, NULL, 0, 0, unmount_event_flags);
+               xfs_sync_data(mp, 0);
+               xfs_sync_attr(mp, 0);
        }
-#endif
+
+       XFS_SEND_PREUNMOUNT(mp);
 
        /*
         * Blow away any referenced inode in the filestreams cache.
@@ -1118,79 +1154,44 @@ xfs_fs_put_super(
        xfs_filestream_unmount(mp);
 
        XFS_bflush(mp->m_ddev_targp);
-       error = xfs_unmount_flush(mp, 0);
-       WARN_ON(error);
-
-       IRELE(rip);
 
-       /*
-        * If we're forcing a shutdown, typically because of a media error,
-        * we want to make sure we invalidate dirty pages that belong to
-        * referenced vnodes as well.
-        */
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
-               ASSERT(error != EFSCORRUPTED);
-       }
-
-       if (mp->m_flags & XFS_MOUNT_DMAPI) {
-               XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
-                               unmount_event_flags);
-       }
+       XFS_SEND_UNMOUNT(mp);
 
-       xfs_unmountfs(mp, NULL);
-       xfs_qmops_put(mp);
+       xfs_unmountfs(mp);
+       xfs_freesb(mp);
+       xfs_icsb_destroy_counters(mp);
+       xfs_close_devices(mp);
        xfs_dmops_put(mp);
-       kmem_free(mp);
-}
-
-STATIC void
-xfs_fs_write_super(
-       struct super_block      *sb)
-{
-       if (!(sb->s_flags & MS_RDONLY))
-               xfs_sync(XFS_M(sb), SYNC_FSDATA);
-       sb->s_dirt = 0;
+       xfs_free_fsname(mp);
+       kfree(mp);
 }
 
 STATIC int
-xfs_fs_sync_super(
+xfs_fs_sync_fs(
        struct super_block      *sb,
        int                     wait)
 {
        struct xfs_mount        *mp = XFS_M(sb);
        int                     error;
-       int                     flags;
 
        /*
-        * Treat a sync operation like a freeze.  This is to work
-        * around a race in sync_inodes() which works in two phases
-        * - an asynchronous flush, which can write out an inode
-        * without waiting for file size updates to complete, and a
-        * synchronous flush, which wont do anything because the
-        * async flush removed the inode's dirty flag.  Also
-        * sync_inodes() will not see any files that just have
-        * outstanding transactions to be flushed because we don't
-        * dirty the Linux inode until after the transaction I/O
-        * completes.
+        * Not much we can do for the first async pass.  Writing out the
+        * superblock would be counter-productive as we are going to redirty
+        * when writing out other data and metadata (and writing out a single
+        * block is quite fast anyway).
+        *
+        * Try to asynchronously kick off quota syncing at least.
         */
-       if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
-               /*
-                * First stage of freeze - no more writers will make progress
-                * now we are here, so we flush delwri and delalloc buffers
-                * here, then wait for all I/O to complete.  Data is frozen at
-                * that point. Metadata is not frozen, transactions can still
-                * occur here so don't bother flushing the buftarg (i.e
-                * SYNC_QUIESCE) because it'll just get dirty again.
-                */
-               flags = SYNC_DATA_QUIESCE;
-       } else
-               flags = SYNC_FSDATA;
+       if (!wait) {
+               xfs_qm_sync(mp, SYNC_TRYLOCK);
+               return 0;
+       }
 
-       error = xfs_sync(mp, flags);
-       sb->s_dirt = 0;
+       error = xfs_quiesce_data(mp);
+       if (error)
+               return -error;
 
-       if (unlikely(laptop_mode)) {
+       if (laptop_mode) {
                int     prev_sync_seq = mp->m_sync_seq;
 
                /*
@@ -1209,7 +1210,7 @@ xfs_fs_sync_super(
                                mp->m_sync_seq != prev_sync_seq);
        }
 
-       return -error;
+       return 0;
 }
 
 STATIC int
@@ -1219,6 +1220,7 @@ xfs_fs_statfs(
 {
        struct xfs_mount        *mp = XFS_M(dentry->d_sb);
        xfs_sb_t                *sbp = &mp->m_sb;
+       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
        __uint64_t              fakeinos, id;
        xfs_extlen_t            lsize;
 
@@ -1238,22 +1240,19 @@ xfs_fs_statfs(
        statp->f_bfree = statp->f_bavail =
                                sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
        fakeinos = statp->f_bfree << sbp->sb_inopblog;
-#if XFS_BIG_INUMS
-       fakeinos += mp->m_inoadd;
-#endif
        statp->f_files =
            MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
        if (mp->m_maxicount)
-#if XFS_BIG_INUMS
-               if (!mp->m_inoadd)
-#endif
-                       statp->f_files = min_t(typeof(statp->f_files),
-                                               statp->f_files,
-                                               mp->m_maxicount);
+               statp->f_files = min_t(typeof(statp->f_files),
+                                       statp->f_files,
+                                       mp->m_maxicount);
        statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
        spin_unlock(&mp->m_sb_lock);
 
-       XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+               xfs_qm_statvfs(ip, statp);
        return 0;
 }
 
@@ -1264,47 +1263,103 @@ xfs_fs_remount(
        char                    *options)
 {
        struct xfs_mount        *mp = XFS_M(sb);
-       struct xfs_mount_args   *args = xfs_args_allocate(sb, 0);
+       substring_t             args[MAX_OPT_ARGS];
+       char                    *p;
        int                     error;
 
-       error = xfs_parseargs(mp, options, args, 1);
-       if (error)
-               goto out_free_args;
+       while ((p = strsep(&options, ",")) != NULL) {
+               int token;
 
-       if (!(*flags & MS_RDONLY)) {                    /* rw/ro -> rw */
-               if (mp->m_flags & XFS_MOUNT_RDONLY)
-               mp->m_flags &= ~XFS_MOUNT_RDONLY;
-               if (args->flags & XFSMNT_BARRIER) {
+               if (!*p)
+                       continue;
+
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_barrier:
                        mp->m_flags |= XFS_MOUNT_BARRIER;
-                       xfs_mountfs_check_barriers(mp);
-               } else {
+
+                       /*
+                        * Test if barriers are actually working if we can,
+                        * else delay this check until the filesystem is
+                        * marked writeable.
+                        */
+                       if (!(mp->m_flags & XFS_MOUNT_RDONLY))
+                               xfs_mountfs_check_barriers(mp);
+                       break;
+               case Opt_nobarrier:
                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                       break;
+               default:
+                       /*
+                        * Logically we would return an error here to prevent
+                        * users from believing they might have changed
+                        * mount options using remount which can't be changed.
+                        *
+                        * But unfortunately mount(8) adds all options from
+                        * mtab and fstab to the mount arguments in some cases
+                        * so we can't blindly reject options, but have to
+                        * check for each specified option if it actually
+                        * differs from the currently set option and only
+                        * reject it if that's the case.
+                        *
+                        * Until that is implemented we return success for
+                        * every remount request, and silently ignore all
+                        * options that we can't actually change.
+                        */
+#if 0
+                       printk(KERN_INFO
+       "XFS: mount option \"%s\" not supported for remount\n", p);
+                       return -EINVAL;
+#else
+                       break;
+#endif
+               }
+       }
+
+       /* ro -> rw */
+       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+               mp->m_flags &= ~XFS_MOUNT_RDONLY;
+               if (mp->m_flags & XFS_MOUNT_BARRIER)
+                       xfs_mountfs_check_barriers(mp);
+
+               /*
+                * If this is the first remount to writeable state we
+                * might have some superblock changes to update.
+                */
+               if (mp->m_update_flags) {
+                       error = xfs_mount_log_sb(mp, mp->m_update_flags);
+                       if (error) {
+                               cmn_err(CE_WARN,
+                                       "XFS: failed to write sb changes");
+                               return error;
+                       }
+                       mp->m_update_flags = 0;
                }
-       } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */
-               xfs_filestream_flush(mp);
-               xfs_sync(mp, SYNC_DATA_QUIESCE);
-               xfs_attr_quiesce(mp);
+       }
+
+       /* rw -> ro */
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+               xfs_quiesce_data(mp);
+               xfs_quiesce_attr(mp);
                mp->m_flags |= XFS_MOUNT_RDONLY;
        }
 
- out_free_args:
-       kmem_free(args);
-       return -error;
+       return 0;
 }
 
 /*
  * Second stage of a freeze. The data is already frozen so we only
- * need to take care of themetadata. Once that's done write a dummy
+ * need to take care of the metadata. Once that's done write a dummy
  * record to dirty the log in case of a crash while frozen.
  */
-STATIC void
-xfs_fs_lockfs(
+STATIC int
+xfs_fs_freeze(
        struct super_block      *sb)
 {
        struct xfs_mount        *mp = XFS_M(sb);
 
-       xfs_attr_quiesce(mp);
-       xfs_fs_log_dummy(mp);
+       xfs_quiesce_attr(mp);
+       return -xfs_fs_log_dummy(mp);
 }
 
 STATIC int
@@ -1315,208 +1370,30 @@ xfs_fs_show_options(
        return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
 }
 
-STATIC int
-xfs_fs_quotasync(
-       struct super_block      *sb,
-       int                     type)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XQUOTASYNC, 0, NULL);
-}
-
-STATIC int
-xfs_fs_getxstate(
-       struct super_block      *sb,
-       struct fs_quota_stat    *fqs)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XGETQSTAT, 0, (caddr_t)fqs);
-}
-
-STATIC int
-xfs_fs_setxstate(
-       struct super_block      *sb,
-       unsigned int            flags,
-       int                     op)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb), op, 0, (caddr_t)&flags);
-}
-
-STATIC int
-xfs_fs_getxquota(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb),
-                                (type == USRQUOTA) ? Q_XGETQUOTA :
-                                 ((type == GRPQUOTA) ? Q_XGETGQUOTA :
-                                  Q_XGETPQUOTA), id, (caddr_t)fdq);
-}
-
-STATIC int
-xfs_fs_setxquota(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       return -XFS_QM_QUOTACTL(XFS_M(sb),
-                                (type == USRQUOTA) ? Q_XSETQLIM :
-                                 ((type == GRPQUOTA) ? Q_XSETGQLIM :
-                                  Q_XSETPQLIM), id, (caddr_t)fdq);
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
-       struct xfs_mount_args   *ap,
-       struct xfs_mount        *mp)
-{
-       /* Values are in BBs */
-       if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
-               /*
-                * At this point the superblock has not been read
-                * in, therefore we do not know the block size.
-                * Before the mount call ends we will convert
-                * these to FSBs.
-                */
-               mp->m_dalign = ap->sunit;
-               mp->m_swidth = ap->swidth;
-       }
-
-       if (ap->logbufs != -1 &&
-           ap->logbufs != 0 &&
-           (ap->logbufs < XLOG_MIN_ICLOGS ||
-            ap->logbufs > XLOG_MAX_ICLOGS)) {
-               cmn_err(CE_WARN,
-                       "XFS: invalid logbufs value: %d [not %d-%d]",
-                       ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-               return XFS_ERROR(EINVAL);
-       }
-       mp->m_logbufs = ap->logbufs;
-       if (ap->logbufsize != -1 &&
-           ap->logbufsize !=  0 &&
-           (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
-            ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
-            !is_power_of_2(ap->logbufsize))) {
-               cmn_err(CE_WARN,
-       "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-                       ap->logbufsize);
-               return XFS_ERROR(EINVAL);
-       }
-       mp->m_logbsize = ap->logbufsize;
-       mp->m_fsname_len = strlen(ap->fsname) + 1;
-       mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
-       strcpy(mp->m_fsname, ap->fsname);
-       if (ap->rtname[0]) {
-               mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
-               strcpy(mp->m_rtname, ap->rtname);
-       }
-       if (ap->logname[0]) {
-               mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
-               strcpy(mp->m_logname, ap->logname);
-       }
-
-       if (ap->flags & XFSMNT_WSYNC)
-               mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
-       if (ap->flags & XFSMNT_INO64) {
-               mp->m_flags |= XFS_MOUNT_INO64;
-               mp->m_inoadd = XFS_INO64_OFFSET;
-       }
-#endif
-       if (ap->flags & XFSMNT_RETERR)
-               mp->m_flags |= XFS_MOUNT_RETERR;
-       if (ap->flags & XFSMNT_NOALIGN)
-               mp->m_flags |= XFS_MOUNT_NOALIGN;
-       if (ap->flags & XFSMNT_SWALLOC)
-               mp->m_flags |= XFS_MOUNT_SWALLOC;
-       if (ap->flags & XFSMNT_OSYNCISOSYNC)
-               mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
-       if (ap->flags & XFSMNT_32BITINODES)
-               mp->m_flags |= XFS_MOUNT_32BITINODES;
-
-       if (ap->flags & XFSMNT_IOSIZE) {
-               if (ap->iosizelog > XFS_MAX_IO_LOG ||
-                   ap->iosizelog < XFS_MIN_IO_LOG) {
-                       cmn_err(CE_WARN,
-               "XFS: invalid log iosize: %d [not %d-%d]",
-                               ap->iosizelog, XFS_MIN_IO_LOG,
-                               XFS_MAX_IO_LOG);
-                       return XFS_ERROR(EINVAL);
-               }
-
-               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-               mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
-       }
-
-       if (ap->flags & XFSMNT_IKEEP)
-               mp->m_flags |= XFS_MOUNT_IKEEP;
-       if (ap->flags & XFSMNT_DIRSYNC)
-               mp->m_flags |= XFS_MOUNT_DIRSYNC;
-       if (ap->flags & XFSMNT_ATTR2)
-               mp->m_flags |= XFS_MOUNT_ATTR2;
-       if (ap->flags & XFSMNT_NOATTR2)
-               mp->m_flags |= XFS_MOUNT_NOATTR2;
-
-       if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
-               mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
-       /*
-        * no recovery flag requires a read-only mount
-        */
-       if (ap->flags & XFSMNT_NORECOVERY) {
-               if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-                       cmn_err(CE_WARN,
-       "XFS: tried to mount a FS read-write without recovery!");
-                       return XFS_ERROR(EINVAL);
-               }
-               mp->m_flags |= XFS_MOUNT_NORECOVERY;
-       }
-
-       if (ap->flags & XFSMNT_NOUUID)
-               mp->m_flags |= XFS_MOUNT_NOUUID;
-       if (ap->flags & XFSMNT_BARRIER)
-               mp->m_flags |= XFS_MOUNT_BARRIER;
-       else
-               mp->m_flags &= ~XFS_MOUNT_BARRIER;
-
-       if (ap->flags2 & XFSMNT2_FILESTREAMS)
-               mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-
-       if (ap->flags & XFSMNT_DMAPI)
-               mp->m_flags |= XFS_MOUNT_DMAPI;
-       return 0;
-}
-
 /*
  * This function fills in xfs_mount_t fields based on mount args.
  * Note: the superblock _has_ now been read in.
  */
 STATIC int
 xfs_finish_flags(
-       struct xfs_mount_args   *ap,
        struct xfs_mount        *mp)
 {
        int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
 
-       /* Fail a mount where the logbuf is smaller then the log stripe */
+       /* Fail a mount where the logbuf is smaller than the log stripe */
        if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-               if ((ap->logbufsize <= 0) &&
-                   (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+               if (mp->m_logbsize <= 0 &&
+                   mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
                        mp->m_logbsize = mp->m_sb.sb_logsunit;
-               } else if (ap->logbufsize > 0 &&
-                          ap->logbufsize < mp->m_sb.sb_logsunit) {
+               } else if (mp->m_logbsize > 0 &&
+                          mp->m_logbsize < mp->m_sb.sb_logsunit) {
                        cmn_err(CE_WARN,
        "XFS: logbuf size must be greater than or equal to log stripe size");
                        return XFS_ERROR(EINVAL);
                }
        } else {
                /* Fail a mount if the logbuf is larger than 32K */
-               if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+               if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
                        cmn_err(CE_WARN,
        "XFS: logbuf size for version 1 logs must be 16K or 32K");
                        return XFS_ERROR(EINVAL);
@@ -1528,7 +1405,7 @@ xfs_finish_flags(
         * told by noattr2 to turn it off
         */
        if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-           !(ap->flags & XFSMNT_NOATTR2))
+           !(mp->m_flags & XFS_MOUNT_NOATTR2))
                mp->m_flags |= XFS_MOUNT_ATTR2;
 
        /*
@@ -1540,61 +1417,9 @@ xfs_finish_flags(
                return XFS_ERROR(EROFS);
        }
 
-       /*
-        * check for shared mount.
-        */
-       if (ap->flags & XFSMNT_SHARED) {
-               if (!xfs_sb_version_hasshared(&mp->m_sb))
-                       return XFS_ERROR(EINVAL);
-
-               /*
-                * For IRIX 6.5, shared mounts must have the shared
-                * version bit set, have the persistent readonly
-                * field set, must be version 0 and can only be mounted
-                * read-only.
-                */
-               if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
-                    (mp->m_sb.sb_shared_vn != 0))
-                       return XFS_ERROR(EINVAL);
-
-               mp->m_flags |= XFS_MOUNT_SHARED;
-
-               /*
-                * Shared XFS V0 can't deal with DMI.  Return EINVAL.
-                */
-               if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
-                       return XFS_ERROR(EINVAL);
-       }
-
-       if (ap->flags & XFSMNT_UQUOTA) {
-               mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-               if (ap->flags & XFSMNT_UQUOTAENF)
-                       mp->m_qflags |= XFS_UQUOTA_ENFD;
-       }
-
-       if (ap->flags & XFSMNT_GQUOTA) {
-               mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-               if (ap->flags & XFSMNT_GQUOTAENF)
-                       mp->m_qflags |= XFS_OQUOTA_ENFD;
-       } else if (ap->flags & XFSMNT_PQUOTA) {
-               mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-               if (ap->flags & XFSMNT_PQUOTAENF)
-                       mp->m_qflags |= XFS_OQUOTA_ENFD;
-       }
-
        return 0;
 }
 
-/*
- * The file system configurations are:
- *     (1) device (partition) with data and internal log
- *     (2) logical volume with data and log subvolumes.
- *     (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
- */
 STATIC int
 xfs_fs_fill_super(
        struct super_block      *sb,
@@ -1603,13 +1428,16 @@ xfs_fs_fill_super(
 {
        struct inode            *root;
        struct xfs_mount        *mp = NULL;
-       struct xfs_mount_args   *args = xfs_args_allocate(sb, silent);
-       struct block_device     *ddev = sb->s_bdev;
-       struct block_device     *logdev = NULL, *rtdev = NULL;
-       int                     flags = 0, error;
+       int                     flags = 0, error = ENOMEM;
+       char                    *mtpt = NULL;
 
-       mp = xfs_mount_init();
+       mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+       if (!mp)
+               goto out;
 
+       spin_lock_init(&mp->m_sb_lock);
+       mutex_init(&mp->m_growlock);
+       atomic_set(&mp->m_active_trans, 0);
        INIT_LIST_HEAD(&mp->m_sync_list);
        spin_lock_init(&mp->m_sync_lock);
        init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1617,127 +1445,57 @@ xfs_fs_fill_super(
        mp->m_super = sb;
        sb->s_fs_info = mp;
 
-       if (sb->s_flags & MS_RDONLY)
-               mp->m_flags |= XFS_MOUNT_RDONLY;
-
-       error = xfs_parseargs(mp, (char *)data, args, 0);
+       error = xfs_parseargs(mp, (char *)data, &mtpt);
        if (error)
-               goto fail_vfsop;
+               goto out_free_fsname;
 
        sb_min_blocksize(sb, BBSIZE);
+       sb->s_xattr = xfs_xattr_handlers;
        sb->s_export_op = &xfs_export_operations;
+#ifdef CONFIG_XFS_QUOTA
        sb->s_qcop = &xfs_quotactl_operations;
+#endif
        sb->s_op = &xfs_super_operations;
 
-       error = xfs_dmops_get(mp, args);
-       if (error)
-               goto fail_vfsop;
-       error = xfs_qmops_get(mp, args);
+       error = xfs_dmops_get(mp);
        if (error)
-               goto fail_vfsop;
+               goto out_free_fsname;
 
-       if (args->flags & XFSMNT_QUIET)
+       if (silent)
                flags |= XFS_MFSI_QUIET;
 
-       /*
-        * Open real time and log devices - order is important.
-        */
-       if (args->logname[0]) {
-               error = xfs_blkdev_get(mp, args->logname, &logdev);
-               if (error)
-                       goto fail_vfsop;
-       }
-       if (args->rtname[0]) {
-               error = xfs_blkdev_get(mp, args->rtname, &rtdev);
-               if (error) {
-                       xfs_blkdev_put(logdev);
-                       goto fail_vfsop;
-               }
-
-               if (rtdev == ddev || rtdev == logdev) {
-                       cmn_err(CE_WARN,
-       "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
-                       xfs_blkdev_put(logdev);
-                       xfs_blkdev_put(rtdev);
-                       error = EINVAL;
-                       goto fail_vfsop;
-               }
-       }
+       error = xfs_open_devices(mp);
+       if (error)
+               goto out_put_dmops;
 
-       /*
-        * Setup xfs_mount buffer target pointers
-        */
-       error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
-       if (!mp->m_ddev_targp) {
-               xfs_blkdev_put(logdev);
-               xfs_blkdev_put(rtdev);
-               goto fail_vfsop;
-       }
-       if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
-               if (!mp->m_rtdev_targp) {
-                       xfs_blkdev_put(logdev);
-                       xfs_blkdev_put(rtdev);
-                       goto error0;
-               }
-       }
-       mp->m_logdev_targp = (logdev && logdev != ddev) ?
-                               xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
-       if (!mp->m_logdev_targp) {
-               xfs_blkdev_put(logdev);
-               xfs_blkdev_put(rtdev);
-               goto error0;
-       }
+       if (xfs_icsb_init_counters(mp))
+               mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
 
-       /*
-        * Setup flags based on mount(2) options and then the superblock
-        */
-       error = xfs_start_flags(args, mp);
-       if (error)
-               goto error1;
        error = xfs_readsb(mp, flags);
        if (error)
-               goto error1;
-       error = xfs_finish_flags(args, mp);
-       if (error)
-               goto error2;
+               goto out_destroy_counters;
 
-       /*
-        * Setup xfs_mount buffer target pointers based on superblock
-        */
-       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-                                   mp->m_sb.sb_sectsize);
-       if (!error && logdev && logdev != ddev) {
-               unsigned int    log_sector_size = BBSIZE;
+       error = xfs_finish_flags(mp);
+       if (error)
+               goto out_free_sb;
 
-               if (xfs_sb_version_hassector(&mp->m_sb))
-                       log_sector_size = mp->m_sb.sb_logsectsize;
-               error = xfs_setsize_buftarg(mp->m_logdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           log_sector_size);
-       }
-       if (!error && rtdev)
-               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           mp->m_sb.sb_sectsize);
+       error = xfs_setup_devices(mp);
        if (error)
-               goto error2;
+               goto out_free_sb;
 
        if (mp->m_flags & XFS_MOUNT_BARRIER)
                xfs_mountfs_check_barriers(mp);
 
        error = xfs_filestream_mount(mp);
        if (error)
-               goto error2;
+               goto out_free_sb;
 
-       error = xfs_mountfs(mp, flags);
+       error = xfs_mountfs(mp);
        if (error)
-               goto error2;
+               goto out_filestream_unmount;
 
-       XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
+       XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
 
-       sb->s_dirt = 1;
        sb->s_magic = XFS_SB_MAGIC;
        sb->s_blocksize = mp->m_sb.sb_blocksize;
        sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1745,7 +1503,7 @@ xfs_fs_fill_super(
        sb->s_time_gran = 1;
        set_posix_acl_flag(sb);
 
-       root = igrab(mp->m_rootip->i_vnode);
+       root = igrab(VFS_I(mp->m_rootip));
        if (!root) {
                error = ENOENT;
                goto fail_unmount;
@@ -1760,33 +1518,28 @@ xfs_fs_fill_super(
                goto fail_vnrele;
        }
 
-       mp->m_sync_work.w_syncer = xfs_sync_worker;
-       mp->m_sync_work.w_mount = mp;
-       mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
-       if (IS_ERR(mp->m_sync_task)) {
-               error = -PTR_ERR(mp->m_sync_task);
+       error = xfs_syncd_init(mp);
+       if (error)
                goto fail_vnrele;
-       }
-
-       xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
 
-       kmem_free(args);
+       kfree(mtpt);
        return 0;
 
- error2:
-       if (mp->m_sb_bp)
-               xfs_freesb(mp);
- error1:
-       xfs_binval(mp->m_ddev_targp);
-       if (logdev && logdev != ddev)
-               xfs_binval(mp->m_logdev_targp);
-       if (rtdev)
-               xfs_binval(mp->m_rtdev_targp);
- error0:
-       xfs_unmountfs_close(mp, NULL);
-       xfs_qmops_put(mp);
+ out_filestream_unmount:
+       xfs_filestream_unmount(mp);
+ out_free_sb:
+       xfs_freesb(mp);
+ out_destroy_counters:
+       xfs_icsb_destroy_counters(mp);
+       xfs_close_devices(mp);
+ out_put_dmops:
        xfs_dmops_put(mp);
-       goto fail_vfsop;
+ out_free_fsname:
+       xfs_free_fsname(mp);
+       kfree(mtpt);
+       kfree(mp);
+ out:
+       return -error;
 
  fail_vnrele:
        if (sb->s_root) {
@@ -1805,19 +1558,9 @@ xfs_fs_fill_super(
        xfs_filestream_unmount(mp);
 
        XFS_bflush(mp->m_ddev_targp);
-       error = xfs_unmount_flush(mp, 0);
-       WARN_ON(error);
-
-       IRELE(mp->m_rootip);
 
-       xfs_unmountfs(mp, NULL);
-       xfs_qmops_put(mp);
-       xfs_dmops_put(mp);
-       kmem_free(mp);
-
- fail_vfsop:
-       kmem_free(args);
-       return -error;
+       xfs_unmountfs(mp);
+       goto out_free_sb;
 }
 
 STATIC int
@@ -1832,28 +1575,20 @@ xfs_fs_get_sb(
                           mnt);
 }
 
-static struct super_operations xfs_super_operations = {
+static const struct super_operations xfs_super_operations = {
        .alloc_inode            = xfs_fs_alloc_inode,
        .destroy_inode          = xfs_fs_destroy_inode,
+       .dirty_inode            = xfs_fs_dirty_inode,
        .write_inode            = xfs_fs_write_inode,
        .clear_inode            = xfs_fs_clear_inode,
        .put_super              = xfs_fs_put_super,
-       .write_super            = xfs_fs_write_super,
-       .sync_fs                = xfs_fs_sync_super,
-       .write_super_lockfs     = xfs_fs_lockfs,
+       .sync_fs                = xfs_fs_sync_fs,
+       .freeze_fs              = xfs_fs_freeze,
        .statfs                 = xfs_fs_statfs,
        .remount_fs             = xfs_fs_remount,
        .show_options           = xfs_fs_show_options,
 };
 
-static struct quotactl_ops xfs_quotactl_operations = {
-       .quota_sync             = xfs_fs_quotasync,
-       .get_xstate             = xfs_fs_getxstate,
-       .set_xstate             = xfs_fs_setxstate,
-       .get_xquota             = xfs_fs_getxquota,
-       .set_xquota             = xfs_fs_setxquota,
-};
-
 static struct file_system_type xfs_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "xfs",
@@ -1862,55 +1597,208 @@ static struct file_system_type xfs_fs_type = {
        .fs_flags               = FS_REQUIRES_DEV,
 };
 
+STATIC int __init
+xfs_init_zones(void)
+{
+
+       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+       if (!xfs_ioend_zone)
+               goto out;
+
+       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+                                                 xfs_ioend_zone);
+       if (!xfs_ioend_pool)
+               goto out_destroy_ioend_zone;
+
+       xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+                                               "xfs_log_ticket");
+       if (!xfs_log_ticket_zone)
+               goto out_destroy_ioend_pool;
+
+       xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+                                               "xfs_bmap_free_item");
+       if (!xfs_bmap_free_item_zone)
+               goto out_destroy_log_ticket_zone;
+
+       xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+                                               "xfs_btree_cur");
+       if (!xfs_btree_cur_zone)
+               goto out_destroy_bmap_free_item_zone;
+
+       xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+                                               "xfs_da_state");
+       if (!xfs_da_state_zone)
+               goto out_destroy_btree_cur_zone;
+
+       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+       if (!xfs_dabuf_zone)
+               goto out_destroy_da_state_zone;
+
+       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+       if (!xfs_ifork_zone)
+               goto out_destroy_dabuf_zone;
+
+       xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+       if (!xfs_trans_zone)
+               goto out_destroy_ifork_zone;
+
+       /*
+        * The size of the zone allocated buf log item is the maximum
+        * size possible under XFS.  This wastes a little bit of memory,
+        * but it is much faster.
+        */
+       xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+                               (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
+                                 NBWORD) * sizeof(int))), "xfs_buf_item");
+       if (!xfs_buf_item_zone)
+               goto out_destroy_trans_zone;
+
+       xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+                       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+                                sizeof(xfs_extent_t))), "xfs_efd_item");
+       if (!xfs_efd_zone)
+               goto out_destroy_buf_item_zone;
+
+       xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+                       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+                               sizeof(xfs_extent_t))), "xfs_efi_item");
+       if (!xfs_efi_zone)
+               goto out_destroy_efd_zone;
+
+       xfs_inode_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
+                       xfs_fs_inode_init_once);
+       if (!xfs_inode_zone)
+               goto out_destroy_efi_zone;
+
+       xfs_ili_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+                                       KM_ZONE_SPREAD, NULL);
+       if (!xfs_ili_zone)
+               goto out_destroy_inode_zone;
+
+       return 0;
+
+ out_destroy_inode_zone:
+       kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+       kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+       kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+       kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_trans_zone:
+       kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+       kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+       kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+       kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+       kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+       kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+       mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+       kmem_zone_destroy(xfs_ioend_zone);
+ out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+       kmem_zone_destroy(xfs_ili_zone);
+       kmem_zone_destroy(xfs_inode_zone);
+       kmem_zone_destroy(xfs_efi_zone);
+       kmem_zone_destroy(xfs_efd_zone);
+       kmem_zone_destroy(xfs_buf_item_zone);
+       kmem_zone_destroy(xfs_trans_zone);
+       kmem_zone_destroy(xfs_ifork_zone);
+       kmem_zone_destroy(xfs_dabuf_zone);
+       kmem_zone_destroy(xfs_da_state_zone);
+       kmem_zone_destroy(xfs_btree_cur_zone);
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+       kmem_zone_destroy(xfs_log_ticket_zone);
+       mempool_destroy(xfs_ioend_pool);
+       kmem_zone_destroy(xfs_ioend_zone);
+
+}
 
 STATIC int __init
-init_xfs_fs( void )
+init_xfs_fs(void)
 {
        int                     error;
-       static char             message[] __initdata = KERN_INFO \
-               XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
 
-       printk(message);
+       printk(KERN_INFO XFS_VERSION_STRING " with "
+                        XFS_BUILD_OPTIONS " enabled\n");
 
-       ktrace_init(64);
+       xfs_ioend_init();
+       xfs_dir_startup();
 
        error = xfs_init_zones();
-       if (error < 0)
-               goto undo_zones;
+       if (error)
+               goto out;
+
+       error = xfs_mru_cache_init();
+       if (error)
+               goto out_destroy_zones;
+
+       error = xfs_filestream_init();
+       if (error)
+               goto out_mru_cache_uninit;
 
        error = xfs_buf_init();
-       if (error < 0)
-               goto undo_buffers;
+       if (error)
+               goto out_filestream_uninit;
+
+       error = xfs_init_procfs();
+       if (error)
+               goto out_buf_terminate;
+
+       error = xfs_sysctl_register();
+       if (error)
+               goto out_cleanup_procfs;
 
-       vn_init();
-       xfs_init();
-       uuid_init();
        vfs_initquota();
 
        error = register_filesystem(&xfs_fs_type);
        if (error)
-               goto undo_register;
+               goto out_sysctl_unregister;
        return 0;
 
-undo_register:
+ out_sysctl_unregister:
+       xfs_sysctl_unregister();
+ out_cleanup_procfs:
+       xfs_cleanup_procfs();
+ out_buf_terminate:
        xfs_buf_terminate();
-
-undo_buffers:
+ out_filestream_uninit:
+       xfs_filestream_uninit();
+ out_mru_cache_uninit:
+       xfs_mru_cache_uninit();
+ out_destroy_zones:
        xfs_destroy_zones();
-
-undo_zones:
+ out:
        return error;
 }
 
 STATIC void __exit
-exit_xfs_fs( void )
+exit_xfs_fs(void)
 {
        vfs_exitquota();
        unregister_filesystem(&xfs_fs_type);
-       xfs_cleanup();
+       xfs_sysctl_unregister();
+       xfs_cleanup_procfs();
        xfs_buf_terminate();
+       xfs_filestream_uninit();
+       xfs_mru_cache_uninit();
        xfs_destroy_zones();
-       ktrace_uninit();
 }
 
 module_init(init_xfs_fs);