xfs: kill xfs_qmops
[safe/jmp/linux-2.6] / fs / xfs / linux-2.6 / xfs_sync.c
index 34413ce..b06b95c 100644 (file)
@@ -43,6 +43,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_rw.h"
+#include "xfs_quota.h"
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -59,29 +60,14 @@ xfs_sync_inodes_ag(
 {
        xfs_perag_t     *pag = &mp->m_perag[ag];
        int             nr_found;
-       int             first_index = 0;
+       uint32_t        first_index = 0;
        int             error = 0;
        int             last_error = 0;
-       int             fflag = XFS_B_ASYNC;
-       int             lock_flags = XFS_ILOCK_SHARED;
-
-       if (flags & SYNC_DELWRI)
-               fflag = XFS_B_DELWRI;
-       if (flags & SYNC_WAIT)
-               fflag = 0;              /* synchronous overrides all */
-
-       if (flags & SYNC_DELWRI) {
-               /*
-                * We need the I/O lock if we're going to call any of
-                * the flush/inval routines.
-                */
-               lock_flags |= XFS_IOLOCK_SHARED;
-       }
 
        do {
                struct inode    *inode;
-               boolean_t       inode_refed;
                xfs_inode_t     *ip = NULL;
+               int             lock_flags = XFS_ILOCK_SHARED;
 
                /*
                 * use a gang lookup to find the next inode in the tree
@@ -97,23 +83,16 @@ xfs_sync_inodes_ag(
                        break;
                }
 
-               /* update the index for the next lookup */
-               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-
                /*
-                * skip inodes in reclaim. Let xfs_syncsub do that for
-                * us so we don't need to worry.
+                * Update the index for the next lookup. Catch overflows
+                * into the next AG range which can occur if we have inodes
+                * in the last block of the AG and we are currently
+                * pointing to the last inode.
                 */
-               if (xfs_iflags_test(ip, (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
-                       read_unlock(&pag->pag_ici_lock);
-                       continue;
-               }
-
-               /* bad inodes are dealt with elsewhere */
-               inode = VFS_I(ip);
-               if (is_bad_inode(inode)) {
+               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
                        read_unlock(&pag->pag_ici_lock);
-                       continue;
+                       break;
                }
 
                /* nothing to sync during shutdown */
@@ -123,42 +102,47 @@ xfs_sync_inodes_ag(
                }
 
                /*
-                * If we can't get a reference on the VFS_I, the inode must be
-                * in reclaim. If we can get the inode lock without blocking,
-                * it is safe to flush the inode because we hold the tree lock
-                * and xfs_iextract will block right now. Hence if we lock the
-                * inode while holding the tree lock, xfs_ireclaim() is
-                * guaranteed to block on the inode lock we now hold and hence
-                * it is safe to reference the inode until we drop the inode
-                * locks completely.
+                * If we can't get a reference on the inode, it must be
+                * in reclaim. Leave it for the reclaim code to flush.
                 */
-               inode_refed = B_FALSE;
-               if (igrab(inode)) {
-                       read_unlock(&pag->pag_ici_lock);
-                       xfs_ilock(ip, lock_flags);
-                       inode_refed = B_TRUE;
-               } else {
-                       if (!xfs_ilock_nowait(ip, lock_flags)) {
-                               /* leave it to reclaim */
-                               read_unlock(&pag->pag_ici_lock);
-                               continue;
-                       }
+               inode = VFS_I(ip);
+               if (!igrab(inode)) {
                        read_unlock(&pag->pag_ici_lock);
+                       continue;
+               }
+               read_unlock(&pag->pag_ici_lock);
+
+               /* avoid new or bad inodes */
+               if (is_bad_inode(inode) ||
+                   xfs_iflags_test(ip, XFS_INEW)) {
+                       IRELE(ip);
+                       continue;
                }
 
                /*
                 * If we have to flush data or wait for I/O completion
-                * we need to drop the ilock that we currently hold.
-                * If we need to drop the lock, insert a marker if we
-                * have not already done so.
+                * we need to hold the iolock.
                 */
-               if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
-                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                       error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
-                       if (flags & SYNC_IOWAIT)
-                               vn_iowait(ip);
-                       xfs_ilock(ip, XFS_ILOCK_SHARED);
+               if (flags & SYNC_DELWRI) {
+                       if (VN_DIRTY(inode)) {
+                               if (flags & SYNC_TRYLOCK) {
+                                       if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
+                                               lock_flags |= XFS_IOLOCK_SHARED;
+                               } else {
+                                       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+                                       lock_flags |= XFS_IOLOCK_SHARED;
+                               }
+                               if (lock_flags & XFS_IOLOCK_SHARED) {
+                                       error = xfs_flush_pages(ip, 0, -1,
+                                                       (flags & SYNC_WAIT) ? 0
+                                                               : XFS_B_ASYNC,
+                                                       FI_NONE);
+                               }
+                       }
+                       if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
+                               xfs_ioend_wait(ip);
                }
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
 
                if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
                        if (flags & SYNC_WAIT) {
@@ -174,13 +158,7 @@ xfs_sync_inodes_ag(
                                        xfs_ifunlock(ip);
                        }
                }
-
-               if (lock_flags)
-                       xfs_iunlock(ip, lock_flags);
-
-               if (inode_refed) {
-                       IRELE(ip);
-               }
+               xfs_iput(ip, lock_flags);
 
                if (error)
                        last_error = error;
@@ -340,12 +318,12 @@ xfs_quiesce_data(
 
        /* push non-blocking */
        xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
-       XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+       xfs_qm_sync(mp, SYNC_BDFLUSH);
        xfs_filestream_flush(mp);
 
        /* push and block */
        xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
-       XFS_QM_DQSYNC(mp, SYNC_WAIT);
+       xfs_qm_sync(mp, SYNC_WAIT);
 
        /* write superblock and hoover up shutdown errors */
        error = xfs_sync_fsdata(mp, 0);
@@ -400,7 +378,11 @@ xfs_quiesce_attr(
        /* flush inodes and push all remaining buffers out to disk */
        xfs_quiesce_fs(mp);
 
-       ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+       /*
+        * Just warn here till VFS can correctly support
+        * read-only remount without racing.
+        */
+       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
 
        /* Push the superblock and write an unmount record */
        error = xfs_log_sbcount(mp, 1);
@@ -423,15 +405,17 @@ STATIC void
 xfs_syncd_queue_work(
        struct xfs_mount *mp,
        void            *data,
-       void            (*syncer)(struct xfs_mount *, void *))
+       void            (*syncer)(struct xfs_mount *, void *),
+       struct completion *completion)
 {
-       struct bhv_vfs_sync_work *work;
+       struct xfs_sync_work *work;
 
-       work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
+       work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
        INIT_LIST_HEAD(&work->w_list);
        work->w_syncer = syncer;
        work->w_data = data;
        work->w_mount = mp;
+       work->w_completion = completion;
        spin_lock(&mp->m_sync_lock);
        list_add_tail(&work->w_list, &mp->m_sync_list);
        spin_unlock(&mp->m_sync_lock);
@@ -445,49 +429,26 @@ xfs_syncd_queue_work(
  * heads, looking about for more room...
  */
 STATIC void
-xfs_flush_inode_work(
+xfs_flush_inodes_work(
        struct xfs_mount *mp,
        void            *arg)
 {
        struct inode    *inode = arg;
-       filemap_flush(inode->i_mapping);
+       xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
+       xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
        iput(inode);
 }
 
 void
-xfs_flush_inode(
+xfs_flush_inodes(
        xfs_inode_t     *ip)
 {
        struct inode    *inode = VFS_I(ip);
+       DECLARE_COMPLETION_ONSTACK(completion);
 
        igrab(inode);
-       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
-       delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
-       struct xfs_mount *mp,
-       void            *arg)
-{
-       struct inode    *inode = arg;
-       sync_blockdev(mp->m_super->s_bdev);
-       iput(inode);
-}
-
-void
-xfs_flush_device(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       igrab(inode);
-       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
-       delay(msecs_to_jiffies(500));
+       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
+       wait_for_completion(&completion);
        xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
@@ -507,7 +468,7 @@ xfs_sync_worker(
                xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
                xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
                /* dgc: errors ignored here */
-               error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+               error = xfs_qm_sync(mp, SYNC_BDFLUSH);
                error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
                if (xfs_log_need_covered(mp))
                        error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
@@ -522,7 +483,7 @@ xfssyncd(
 {
        struct xfs_mount        *mp = arg;
        long                    timeleft;
-       bhv_vfs_sync_work_t     *work, *n;
+       xfs_sync_work_t         *work, *n;
        LIST_HEAD               (tmp);
 
        set_freezable();
@@ -557,6 +518,8 @@ xfssyncd(
                        list_del(&work->w_list);
                        if (work == &mp->m_sync_work)
                                continue;
+                       if (work->w_completion)
+                               complete(work->w_completion);
                        kmem_free(work);
                }
        }
@@ -570,6 +533,7 @@ xfs_syncd_init(
 {
        mp->m_sync_work.w_syncer = xfs_sync_worker;
        mp->m_sync_work.w_mount = mp;
+       mp->m_sync_work.w_completion = NULL;
        mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
        if (IS_ERR(mp->m_sync_task))
                return -PTR_ERR(mp->m_sync_task);
@@ -644,32 +608,147 @@ xfs_reclaim_inode(
        return 0;
 }
 
-int
-xfs_reclaim_inodes(
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+       xfs_inode_t     *ip)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
+
+       read_lock(&pag->pag_ici_lock);
+       spin_lock(&ip->i_flags_lock);
+       radix_tree_tag_set(&pag->pag_ici_root,
+                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+       spin_unlock(&ip->i_flags_lock);
+       read_unlock(&pag->pag_ici_lock);
+       xfs_put_perag(mp, pag);
+}
+
+void
+__xfs_inode_clear_reclaim_tag(
        xfs_mount_t     *mp,
-       int              noblock,
+       xfs_perag_t     *pag,
+       xfs_inode_t     *ip)
+{
+       radix_tree_tag_clear(&pag->pag_ici_root,
+                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+}
+
+void
+xfs_inode_clear_reclaim_tag(
+       xfs_inode_t     *ip)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
+
+       read_lock(&pag->pag_ici_lock);
+       spin_lock(&ip->i_flags_lock);
+       __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+       spin_unlock(&ip->i_flags_lock);
+       read_unlock(&pag->pag_ici_lock);
+       xfs_put_perag(mp, pag);
+}
+
+
+STATIC void
+xfs_reclaim_inodes_ag(
+       xfs_mount_t     *mp,
+       int             ag,
+       int             noblock,
        int             mode)
 {
-       xfs_inode_t     *ip, *n;
+       xfs_inode_t     *ip = NULL;
+       xfs_perag_t     *pag = &mp->m_perag[ag];
+       int             nr_found;
+       uint32_t        first_index;
+       int             skipped;
 
 restart:
-       XFS_MOUNT_ILOCK(mp);
-       list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
+       first_index = 0;
+       skipped = 0;
+       do {
+               /*
+                * use a gang lookup to find the next inode in the tree
+                * as the tree is sparse and a gang lookup walks to find
+                * the number of objects requested.
+                */
+               read_lock(&pag->pag_ici_lock);
+               nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+                                       (void**)&ip, first_index, 1,
+                                       XFS_ICI_RECLAIM_TAG);
+
+               if (!nr_found) {
+                       read_unlock(&pag->pag_ici_lock);
+                       break;
+               }
+
+               /*
+                * Update the index for the next lookup. Catch overflows
+                * into the next AG range which can occur if we have inodes
+                * in the last block of the AG and we are currently
+                * pointing to the last inode.
+                */
+               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+                       read_unlock(&pag->pag_ici_lock);
+                       break;
+               }
+
+               /* ignore if already under reclaim */
+               if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
+                       read_unlock(&pag->pag_ici_lock);
+                       continue;
+               }
+
                if (noblock) {
-                       if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
+                       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
+                               read_unlock(&pag->pag_ici_lock);
                                continue;
+                       }
                        if (xfs_ipincount(ip) ||
                            !xfs_iflock_nowait(ip)) {
                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                               read_unlock(&pag->pag_ici_lock);
                                continue;
                        }
                }
-               XFS_MOUNT_IUNLOCK(mp);
+               read_unlock(&pag->pag_ici_lock);
+
+               /*
+                * hmmm - this is an inode already in reclaim. Do
+                * we even bother catching it here?
+                */
                if (xfs_reclaim_inode(ip, noblock, mode))
-                       delay(1);
+                       skipped++;
+       } while (nr_found);
+
+       if (skipped) {
+               delay(1);
                goto restart;
        }
-       XFS_MOUNT_IUNLOCK(mp);
+       return;
+
+}
+
+int
+xfs_reclaim_inodes(
+       xfs_mount_t     *mp,
+       int              noblock,
+       int             mode)
+{
+       int             i;
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+               if (!mp->m_perag[i].pag_ici_init)
+                       continue;
+               xfs_reclaim_inodes_ag(mp, i, noblock, mode);
+       }
        return 0;
 }